diff options
Diffstat (limited to 'gcc-4.2.1-5666.3/gcc/config/i386')
41 files changed, 67834 insertions, 0 deletions
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h new file mode 100644 index 000000000..8a466d914 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h @@ -0,0 +1,106 @@ +/* APPLE LOCAL file 5612787 mainline sse4 */ +/* Copyright (C) 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the AMD Programmers + Manual Update, version 2.x */ + +#ifndef _AMMINTRIN_H_INCLUDED +#define _AMMINTRIN_H_INCLUDED + +#ifndef __SSE4A__ +# error "SSE4A instruction set not enabled" +#else + +/* We need definitions from the SSE3, SSE2 and SSE header files*/ +#include <pmmintrin.h> + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +__STATIC_INLINE void __attribute__((__always_inline__)) +_mm_stream_sd (double * __P, __m128d __Y) +{ + __builtin_ia32_movntsd (__P, (__v2df) __Y); +} + +__STATIC_INLINE void __attribute__((__always_inline__)) +_mm_stream_ss (float * __P, __m128 __Y) +{ + __builtin_ia32_movntss (__P, (__v4sf) __Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_extract_si64 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); +} + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L) +{ + return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L); +} +#else +#define _mm_extracti_si64(X, I, L) \ + ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L)) +#endif + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_insert_si64 (__m128i __X,__m128i __Y) +{ + return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); +} + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L) +{ + return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L); +} +#else +#define _mm_inserti_si64(X, Y, I, L) \ + ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L)) +#endif + +#endif /* __SSE4A__ */ + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +#endif /* _AMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md b/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md new file mode 100644 index 000000000..6d92b948b --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md @@ -0,0 +1,874 @@ +;; AMD Athlon Scheduling +;; +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units separately instead. + +(define_attr "athlon_decode" "direct,vector,double" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) + +;; +;; decode0 decode1 decode2 +;; \ | / +;; instruction control unit (72 entry scheduler) +;; | | +;; integer scheduler (18) stack map +;; / | | | | \ stack rename +;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler +;; | agu0 | agu1 agu2 register file +;; | \ | | / | | | +;; \ /\ | / fadd fmul fstore +;; \ / \ | / fadd fmul fstore +;; imul load/store (2x) fadd fmul fstore + +(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") +(define_cpu_unit "athlon-decode0" "athlon") +(define_cpu_unit "athlon-decode1" "athlon") +(define_cpu_unit "athlon-decode2" "athlon") +(define_cpu_unit "athlon-decodev" "athlon") +;; Model the fact that double decoded instruction may take 2 cycles +;; to decode when decoder2 and decoder0 in next cycle +;; is used (this is needed to allow troughput of 1.5 double decoded +;; instructions per cycle). +;; +;; In order to avoid dependence between reservation of decoder +;; and other units, we model decoder as two stage fully pipelined unit +;; and only double decoded instruction may occupy unit in the first cycle. +;; With this scheme however two double instructions can be issued cycle0. +;; +;; Avoid this by using presence set requiring decoder0 to be allocated +;; too. Vector decoded instructions then can't be issued when +;; modeled as consuming decoder0+decoder1+decoder2. +;; We solve that by specialized vector decoder unit and exclusion set. +(presence_set "athlon-decode2" "athlon-decode0") +(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") +(define_reservation "athlon-vector" "nothing,athlon-decodev") +(define_reservation "athlon-direct0" "nothing,athlon-decode0") +(define_reservation "athlon-direct" "nothing, + (athlon-decode0 | athlon-decode1 + | athlon-decode2)") +;; Double instructions behaves like two direct instructions. +(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) + | (nothing,(athlon-decode0 + athlon-decode1)) + | (nothing,(athlon-decode1 + athlon-decode2)))") + +;; Agu and ieu unit results in extremely large automatons and +;; in our approximation they are hardly filled in. Only ieu +;; unit can, as issue rate is 3 and agu unit is always used +;; first in the insn reservations. Skip the models. + +;(define_cpu_unit "athlon-ieu0" "athlon_ieu") +;(define_cpu_unit "athlon-ieu1" "athlon_ieu") +;(define_cpu_unit "athlon-ieu2" "athlon_ieu") +;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") +(define_reservation "athlon-ieu" "nothing") +(define_cpu_unit "athlon-ieu0" "athlon") +;(define_cpu_unit "athlon-agu0" "athlon_agu") +;(define_cpu_unit "athlon-agu1" "athlon_agu") +;(define_cpu_unit "athlon-agu2" "athlon_agu") +;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") +(define_reservation "athlon-agu" "nothing") + +(define_cpu_unit "athlon-mult" "athlon_mult") + +(define_cpu_unit "athlon-load0" "athlon_load") +(define_cpu_unit "athlon-load1" "athlon_load") +(define_reservation "athlon-load" "athlon-agu, + (athlon-load0 | athlon-load1),nothing") +;; 128bit SSE instructions issue two loads at once +(define_reservation "athlon-load2" "athlon-agu, + (athlon-load0 + athlon-load1),nothing") + +(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") +;; 128bit SSE instructions issue two stores at once +(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") + + +;; The FP operations start to execute at stage 12 in the pipeline, while +;; integer operations start to execute at stage 9 for Athlon and 11 for K8 +;; Compensate the difference for Athlon because it results in significantly +;; smaller automata. +(define_reservation "athlon-fpsched" "nothing,nothing,nothing") +;; The floating point loads. +(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") +(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") +(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") +(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") + + +;; The three fp units are fully pipelined with latency of 3 +(define_cpu_unit "athlon-fadd" "athlon_fp") +(define_cpu_unit "athlon-fmul" "athlon_fp") +(define_cpu_unit "athlon-fstore" "athlon_fp") +(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") +(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") + +;; Vector operations usually consume many of pipes. +(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") + + +;; Jump instructions are executed in the branch unit completely transparent to us +(define_insn_reservation "athlon_branch" 0 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "ibr")) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_call" 0 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "call,callv")) + "athlon-vector,athlon-ieu") + +;; Latency of push operation is 3 cycles, but ESP value is available +;; earlier +(define_insn_reservation "athlon_push" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "push")) + "athlon-direct,athlon-agu,athlon-store") +(define_insn_reservation "athlon_pop" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "pop")) + "athlon-vector,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_pop_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "pop")) + "athlon-double,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_leave" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "leave")) + "athlon-vector,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_leave_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "leave")) + "athlon-double,(athlon-ieu+athlon-load)") + +;; Lea executes in AGU unit with 2 cycles latency. +(define_insn_reservation "athlon_lea" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "lea")) + "athlon-direct,athlon-agu,nothing") + +;; Mul executes in special multiplier unit attached to IEU0 +(define_insn_reservation "athlon_imul" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") +;; ??? Widening multiply is vector or double. +(define_insn_reservation "athlon_imul_k8_DI" 4 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none,unknown")))) + "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") +(define_insn_reservation "athlon_imul_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none,unknown"))) + "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") +(define_insn_reservation "athlon_imul_mem" 8 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") +(define_insn_reservation "athlon_imul_mem_k8_DI" 7 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load,both")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") +(define_insn_reservation "athlon_imul_mem_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") + +;; Idiv cannot execute in parallel with other instructions. Dealing with it +;; as with short latency vector instruction is good approximation avoiding +;; scheduler from trying too hard to can hide it's latency by overlap with +;; other instructions. +;; ??? Experiments show that the idiv can overlap with roughly 6 cycles +;; of the other code + +(define_insn_reservation "athlon_idiv" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") +(define_insn_reservation "athlon_idiv_mem" 9 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "load,both"))) + "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") +;; The parallelism of string instructions is not documented. Model it same way +;; as idiv to create smaller automata. This probably does not matter much. +(define_insn_reservation "athlon_str" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both,store"))) + "athlon-vector,athlon-load,athlon-ieu0*6") + +(define_insn_reservation "athlon_idirect" 1 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_ivector" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_idirect_loadmov" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "imov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load") +(define_insn_reservation "athlon_idirect_load" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_ivector_load" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_idirect_movstore" 1 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "imov") + (eq_attr "memory" "store"))) + "athlon-direct,athlon-agu,athlon-store") +(define_insn_reservation "athlon_idirect_both" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-direct,athlon-load, + athlon-ieu,athlon-store, + athlon-store") +(define_insn_reservation "athlon_ivector_both" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-vector,athlon-load, + athlon-ieu, + athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_idirect_store" 1 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-direct,(athlon-ieu+athlon-agu), + athlon-store") +(define_insn_reservation "athlon_ivector_store" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, + athlon-store") + +;; Athlon floatin point unit +(define_insn_reservation "athlon_fldxf" 12 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fpload2,athlon-fvector*9") +(define_insn_reservation "athlon_fldxf_k8" 13 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fpload2k8,athlon-fvector*9") +;; Assume superforwarding to take place so effective latency of fany op is 0. +(define_insn_reservation "athlon_fld" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_fld_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fstore") + +(define_insn_reservation "athlon_fstxf" 10 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "store,both") + (eq_attr "mode" "XF")))) + "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") +(define_insn_reservation "athlon_fstxf_k8" 8 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "store,both") + (eq_attr "mode" "XF")))) + "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") +(define_insn_reservation "athlon_fst" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fst_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fist" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fistp,fisttp")) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fmov" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fmov")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +(define_insn_reservation "athlon_fadd_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fop") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fadd_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fop") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fadd" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fop")) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_fmul_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_fmul_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_fmul" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fmul")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fsgn" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fsgn")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fdiv_load" 24 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_fdiv_load_k8" 13 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_fdiv" 24 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fdiv_k8" 11 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "fdiv")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fpspc_load" 103 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fvector") +(define_insn_reservation "athlon_fpspc" 100 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fpspc")) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_fcmov_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmov") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fvector") +(define_insn_reservation "athlon_fcmov" 7 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_fcmov_load_k8" 17 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fcmov") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fploadk8,athlon-fvector") +(define_insn_reservation "athlon_fcmov_k8" 15 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "fcmov")) + "athlon-vector,athlon-fpsched,athlon-fvector") +;; fcomi is vector decoded by uses only one pipe. +(define_insn_reservation "athlon_fcomi_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fcomi_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fcmp") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fcomi" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "type" "fcmp"))) + "athlon-vector,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_fcom_load" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fcom_load_k8" 4 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fcom" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "fcmp")) + "athlon-direct,athlon-fpsched,athlon-fadd") +;; Never seen by the scheduler because we still don't do post reg-stack +;; scheduling. +;(define_insn_reservation "athlon_fxch" 2 +; (and (eq_attr "cpu" "athlon,k8,generic64") +; (eq_attr "type" "fxch")) +; "athlon-direct,athlon-fpsched,athlon-fany") + +;; Athlon handle MMX operations in the FPU unit with shorter latencies + +(define_insn_reservation "athlon_movlpd_load" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_movlpd_load_k8" 2 + (and (eq_attr "cpu" "k8") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_movsd_load_generic64" 2 + (and (eq_attr "cpu" "generic64") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)") +(define_insn_reservation "athlon_movaps_load_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load")))) + "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") +(define_insn_reservation "athlon_movaps_load" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") +(define_insn_reservation "athlon_movss_load" 1 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload,(athlon-fany*2)") +(define_insn_reservation "athlon_movss_load_k8" 1 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "load")))) + "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") +(define_insn_reservation "athlon_mmxsseld" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_mmxsseld_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_mmxssest" 3 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") +(define_insn_reservation "athlon_mmxssest_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") +(define_insn_reservation "athlon_mmxssest_short" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_movaps_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (eq_attr "mode" "V4SF,V2DF,TI"))) + "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") +(define_insn_reservation "athlon_movaps" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (eq_attr "mode" "V4SF,V2DF,TI"))) + "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") +(define_insn_reservation "athlon_mmxssemov" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "mmxmov,ssemov")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +(define_insn_reservation "athlon_mmxmul_load" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_mmxmul" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "mmxmul")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_mmx_load" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "unit" "mmx") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-faddmul") +(define_insn_reservation "athlon_mmx" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "unit" "mmx")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +;; SSE operations are handled by the i387 unit as well. The latency +;; is same as for i387 operations for scalar operations + +(define_insn_reservation "athlon_sselog_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fmul*2)") +(define_insn_reservation "athlon_sselog_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_sselog" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sselog,sselog1")) + "athlon-vector,athlon-fpsched,athlon-fmul*2") +(define_insn_reservation "athlon_sselog_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "sselog,sselog1")) + "athlon-double,athlon-fpsched,athlon-fmul") +;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. +(define_insn_reservation "athlon_ssecmp_load" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_ssecmp_load_k8" 4 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "SF,DF,DI,TI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecmp" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecmp") + (eq_attr "mode" "SF,DF,DI,TI"))) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_ssecmpvector_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssecmp")) + "athlon-vector,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssecmp")) + "athlon-double,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecomi_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecomi" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "ssecmp")) + "athlon-vector,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_sseadd_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_sseadd_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_sseadd" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "sseadd") + (eq_attr "mode" "SF,DF,DI"))) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_sseaddvector_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_load_k8" 7 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sseadd")) + "athlon-vector,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "sseadd")) + "athlon-double,athlon-fpsched,(athlon-fadd*2)") + +;; Conversions behaves very irregularly and the scheduling is critical here. +;; Take each instruction separately. Assume that the mode is always set to the +;; destination one and athlon_decode is set to the K8 versions. + +;; cvtss2sd +(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "mode" "DF") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "direct") + (eq_attr "mode" "DF")))) + "athlon-direct,athlon-fpsched,athlon-fstore") +;; cvtps2pd. Model same way the other double decoded FP conversions. +(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "V2DF,V4SF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (eq_attr "mode" "V2DF,V4SF,TI")))) + "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") +;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) +;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 + (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +;; cvtsi2ss mem, reg is doublepath +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-vector,athlon-fpload,(athlon-fstore*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fstore*2)") +;; cvtsi2sd reg,reg is double decoded (vector on Athlon) +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fploadk8,athlon-fstore") +;; cvtsi2ss reg, reg is doublepath +(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") +;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fstore*3)") +;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 +(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,(athlon-fvector*3)") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") +;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 +;; ??? Why it is fater than cvtsd2ss? +(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-fvector*2") +;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 +(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "load"))))) + "athlon-vector,athlon-fploadk8,athlon-fvector") +;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 +(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,athlon-fstore") + + +(define_insn_reservation "athlon_ssemul_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_ssemul_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_ssemul" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_ssemulvector_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_load_k8" 7 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssemul")) + "athlon-vector,athlon-fpsched,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssemul")) + "athlon-double,athlon-fpsched,(athlon-fmul*2)") +;; divsd timings. divss is faster +(define_insn_reservation "athlon_ssediv_load" 20 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fmul*17") +(define_insn_reservation "athlon_ssediv_load_k8" 22 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fmul*17") +(define_insn_reservation "athlon_ssediv" 20 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssediv") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fpsched,athlon-fmul*17") +(define_insn_reservation "athlon_ssedivvector_load" 39 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_load_k8" 35 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector" 39 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssediv")) + "athlon-vector,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_k8" 39 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssediv")) + "athlon-double,athlon-fmul*34") diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h b/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h new file mode 100644 index 000000000..46a55b0d1 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h @@ -0,0 +1,25 @@ +/* Make configure files to produce biarch compiler defaulting to 64bit mode. + This file must be included very first, while the OS specific file later + to overwrite otherwise wrong defaults. + Copyright (C) 2001 Free Software Foundation, Inc. + Contributed by Bo Thorsen <bo@suse.de>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +#define TARGET_64BIT_DEFAULT MASK_64BIT +#define TARGET_BI_ARCH 1 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md b/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md new file mode 100644 index 000000000..5d76ac523 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md @@ -0,0 +1,171 @@ +;; Constraint definitions for IA-32 and x86-64. +;; Copyright (C) 2006 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;;; Unused letters: +;;; B H TU W +;;; h jk vw z + +;; Integer register constraints. +;; It is not necessary to define 'r' here. +(define_register_constraint "R" "LEGACY_REGS" + "Legacy register---the eight integer registers available on all + i386 processors (@code{a}, @code{b}, @code{c}, @code{d}, + @code{si}, @code{di}, @code{bp}, @code{sp}).") + +(define_register_constraint "q" "TARGET_64BIT ? GENERAL_REGS : Q_REGS" + "Any register accessible as @code{@var{r}l}. In 32-bit mode, @code{a}, + @code{b}, @code{c}, and @code{d}; in 64-bit mode, any integer register.") + +(define_register_constraint "Q" "Q_REGS" + "Any register accessible as @code{@var{r}h}: @code{a}, @code{b}, + @code{c}, and @code{d}.") + +(define_register_constraint "l" "INDEX_REGS" + "@internal Any register that can be used as the index in a base+index + memory access: that is, any general register except the stack pointer.") + +(define_register_constraint "a" "AREG" + "The @code{a} register.") + +(define_register_constraint "b" "BREG" + "The @code{b} register.") + +(define_register_constraint "c" "CREG" + "The @code{c} register.") + +(define_register_constraint "d" "DREG" + "The @code{d} register.") + +(define_register_constraint "S" "SIREG" + "The @code{si} register.") + +(define_register_constraint "D" "DIREG" + "The @code{di} register.") + +(define_register_constraint "A" "AD_REGS" + "The @code{a} and @code{d} registers, as a pair (for instructions + that return half the result in one and half in the other).") + +;; Floating-point register constraints. +(define_register_constraint "f" + "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FLOAT_REGS : NO_REGS" + "Any 80387 floating-point (stack) register.") + +(define_register_constraint "t" + "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_TOP_REG : NO_REGS" + "Top of 80387 floating-point stack (@code{%st(0)}).") + +(define_register_constraint "u" + "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS" + "Second from top of 80387 floating-point stack (@code{%st(1)}).") + +;; Vector registers (also used for plain floating point nowadays). +(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS" + "Any MMX register.") + +(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS" + "Any SSE register.") + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; We use the Y prefix to denote any number of conditional register sets: +;; 0 First SSE register. +;; t SSE2 enabled +;; i SSE2 inter-unit moves enabled +;; m MMX inter-unit moves enabled + +(define_register_constraint "Y0" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" + "First SSE register (@code{%xmm0}).") + +(define_register_constraint "Yt" "TARGET_SSE2 ? SSE_REGS : NO_REGS" + "@internal Any SSE register, when SSE2 is enabled.") + +(define_register_constraint "Yi" + "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS" + "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.") + +(define_register_constraint "Ym" + "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS" + "@internal Any MMX register, when inter-unit moves are enabled.") +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Integer constant constraints. +(define_constraint "I" + "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 31"))) + +(define_constraint "J" + "Integer constant in the range 0 @dots{} 63, for 64-bit shifts." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 63"))) + +(define_constraint "K" + "Signed 8-bit integer constant." + (and (match_code "const_int") + (match_test "ival >= -128 && ival <= 127"))) + +(define_constraint "L" + "@code{0xFF} or @code{0xFFFF}, for andsi as a zero-extending move." + (and (match_code "const_int") + (match_test "ival == 0xFF || ival == 0xFFFF"))) + +(define_constraint "M" + "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 3"))) + +(define_constraint "N" + "Unsigned 8-bit integer constant (for @code{in} and @code{out} + instructions)." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 255"))) + +(define_constraint "O" + "@internal Integer constant in the range 0 @dots{} 127, for 128-bit shifts." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 127"))) + +;; Floating-point constant constraints. +;; We allow constants even if TARGET_80387 isn't set, because the +;; stack register converter may need to load 0.0 into the function +;; value register (top of stack). +(define_constraint "G" + "Standard 80387 floating point constant." + (and (match_code "const_double") + (match_test "standard_80387_constant_p (op)"))) + +;; This can theoretically be any mode's CONST0_RTX. +(define_constraint "C" + "Standard SSE floating point constant." + (match_test "standard_sse_constant_p (op)")) + +;; Constant-or-symbol-reference constraints. + +(define_constraint "e" + "32-bit signed integer constant, or a symbolic reference known + to fit that range (for immediate operands in sign-extending x86-64 + instructions)." + (match_operand 0 "x86_64_immediate_operand")) + +(define_constraint "Z" + "32-bit unsigned integer constant, or a symbolic reference known + to fit that range (for immediate operands in zero-extending x86-64 + instructions)." + (match_operand 0 "x86_64_zext_immediate_operand")) diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver new file mode 100644 index 000000000..aaeb934fe --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver @@ -0,0 +1,81 @@ +__Unwind_Backtrace +__Unwind_DeleteException +__Unwind_FindEnclosingFunction +__Unwind_Find_FDE +__Unwind_ForcedUnwind +__Unwind_GetCFA +__Unwind_GetDataRelBase +__Unwind_GetGR +__Unwind_GetIP +__Unwind_GetLanguageSpecificData +__Unwind_GetRegionStart +__Unwind_GetTextRelBase +__Unwind_RaiseException +__Unwind_Resume +__Unwind_Resume_or_Rethrow +__Unwind_SetGR +__Unwind_SetIP +___absvdi2 +___absvsi2 +___addvdi3 +___addvsi3 +___ashldi3 +___ashrdi3 +___clear_cache +___clzdi2 +___clzsi2 +___cmpdi2 +___ctzdi2 +___ctzsi2 +___deregister_frame +___deregister_frame_info +___deregister_frame_info_bases +___divdc3 +___divdi3 +___divsc3 +___divxc3 +___enable_execute_stack +___ffsdi2 +___fixdfdi +___fixsfdi +___fixunsdfdi +___fixunsdfsi +___fixunssfdi +___fixunssfsi +___fixunsxfdi +___fixunsxfsi +___fixxfdi +___floatdidf +___floatdisf +___floatdixf +___gcc_personality_v0 +___lshrdi3 +___moddi3 +___muldc3 +___muldi3 +___mulsc3 +___mulvdi3 +___mulvsi3 +___mulxc3 +___negdi2 +___negvdi2 +___negvsi2 +___paritydi2 +___paritysi2 +___popcountdi2 +___popcountsi2 +___powidf2 +___powisf2 +___powixf2 +___register_frame +___register_frame_info +___register_frame_info_bases +___register_frame_info_table +___register_frame_info_table_bases +___register_frame_table +___subvdi3 +___subvsi3 +___ucmpdi2 +___udivdi3 +___udivmoddi4 +___umoddi3 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver new file mode 100644 index 000000000..02a085843 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver @@ -0,0 +1,85 @@ +__Unwind_Backtrace +__Unwind_DeleteException +__Unwind_FindEnclosingFunction +__Unwind_Find_FDE +__Unwind_ForcedUnwind +__Unwind_GetCFA +__Unwind_GetDataRelBase +__Unwind_GetGR +__Unwind_GetIP +__Unwind_GetIPInfo +__Unwind_GetLanguageSpecificData +__Unwind_GetRegionStart +__Unwind_GetTextRelBase +__Unwind_RaiseException +__Unwind_Resume +__Unwind_Resume_or_Rethrow +__Unwind_SetGR +__Unwind_SetIP +___absvdi2 +___absvsi2 +___addvdi3 +___addvsi3 +___ashldi3 +___ashrdi3 +___clear_cache +___clzdi2 +___clzsi2 +___cmpdi2 +___ctzdi2 +___ctzsi2 +___deregister_frame +___deregister_frame_info +___deregister_frame_info_bases +___divdc3 +___divdi3 +___divsc3 +___divxc3 +___enable_execute_stack +___ffsdi2 +___fixdfdi +___fixsfdi +___fixunsdfdi +___fixunsdfsi +___fixunssfdi +___fixunssfsi +___fixunsxfdi +___fixunsxfsi +___fixxfdi +___floatdidf +___floatdisf +___floatdixf +___floatundidf +___floatundisf +___floatundixf +___gcc_personality_v0 +___lshrdi3 +___moddi3 +___muldc3 +___muldi3 +___mulsc3 +___mulvdi3 +___mulvsi3 +___mulxc3 +___negdi2 +___negvdi2 +___negvsi2 +___paritydi2 +___paritysi2 +___popcountdi2 +___popcountsi2 +___powidf2 +___powisf2 +___powixf2 +___register_frame +___register_frame_info +___register_frame_info_bases +___register_frame_info_table +___register_frame_info_table_bases +___register_frame_table +___subvdi3 +___subvsi3 +___ucmpdi2 +___udivdi3 +___udivmoddi4 +___umoddi3 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h new file mode 100644 index 000000000..d0b1db12b --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h @@ -0,0 +1,452 @@ +/* Target definitions for x86 running Darwin. + Copyright (C) 2001, 2002, 2004, 2005 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Enable Mach-O bits in generic x86 code. */ +#undef TARGET_MACHO +#define TARGET_MACHO 1 + +/* APPLE LOCAL begin mainline */ +#undef TARGET_64BIT +/* APPLE LOCAL begin 5612787 mainline sse4 */ +#define TARGET_64BIT (target_flags & MASK_64BIT) +/* APPLE LOCAL end 5612787 mainline sse4 */ + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __x86_64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +#define TARGET_VERSION fprintf (stderr, " (i686 Darwin)"); +/* APPLE LOCAL end mainline */ + +#undef TARGET_64BIT +#define TARGET_64BIT (target_flags & MASK_64BIT) + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __x86_64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +/* Size of the Obj-C jump buffer. */ +#define OBJC_JBLEN ((TARGET_64BIT) ? ((9 * 2) + 3 + 16) : (18)) + +#undef TARGET_FPMATH_DEFAULT +#define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387) + +/* APPLE LOCAL begin mainline */ +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +#undef MAX_BITS_PER_WORD +#define MAX_BITS_PER_WORD 64 + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__LITTLE_ENDIAN__"); \ + darwin_cpp_builtins (pfile); \ + } \ + while (0) +/* APPLE LOCAL end mainline */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef MAX_BITS_PER_WORD +#define MAX_BITS_PER_WORD 64 + +#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN +#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0) + +/* We want -fPIC by default, unless we're using -static to compile for + the kernel or some such. */ + +#undef CC1_SPEC +#define CC1_SPEC "%{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \ + "/* APPLE LOCAL ARM ignore -mthumb and -mno-thumb */"\ + %<mthumb %<mno-thumb \ + "/* APPLE LOCAL ARM 5683689 */"\ + %{!mmacosx-version-min=*: %{!miphoneos-version-min=*: %(darwin_cc1_minversion)}} \ + "/* APPLE LOCAL ignore -mcpu=G4 -mcpu=G5 */"\ + %<faltivec %<mno-fused-madd %<mlong-branch %<mlongcall %<mcpu=G4 %<mcpu=G5 \ + %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }}" + +/* APPLE LOCAL AltiVec */ +#define CPP_ALTIVEC_SPEC "%<faltivec" + +/* APPLE LOCAL begin mainline */ +#undef ASM_SPEC +/* APPLE LOCAL begin kext weak_import 5935650 */ +#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \ + %{mkernel|static|fapple-kext:%{!m64:-static}}" +/* APPLE LOCAL end kext weak_import 5935650 */ + +#define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}" +#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC + +/* APPLE LOCAL begin mainline 2007-03-13 5005743 5040758 */ \ +/* Determine a minimum version based on compiler options. */ +#define DARWIN_MINVERSION_SPEC \ + "%{!m64|fgnu-runtime:10.4; \ + ,objective-c|,objc-cpp-output:10.5; \ + ,objective-c-header:10.5; \ + ,objective-c++|,objective-c++-cpp-output:10.5; \ + ,objective-c++-header|,objc++-cpp-output:10.5; \ + :10.4}" + +/* APPLE LOCAL end mainline 2007-03-13 5005743 5040758 */ \ +/* APPLE LOCAL begin ARM 5683689 */ +/* Default cc1 option for specifying minimum version number. */ +#define DARWIN_CC1_MINVERSION_SPEC "-mmacosx-version-min=%(darwin_minversion)" + +/* Default ld option for specifying minimum version number. */ +#define DARWIN_LD_MINVERSION_SPEC "-macosx_version_min %(darwin_minversion)" + +/* Use macosx version numbers by default. */ +#define DARWIN_DEFAULT_VERSION_TYPE DARWIN_VERSION_MACOSX +/* APPLE LOCAL end ARM 5683689 */ + +/* APPLE LOCAL ARM 5681645 8307333 */ +#define DARWIN_IPHONEOS_LIBGCC_SPEC "-lgcc" + +/* APPLE LOCAL begin link optimizations 6499452 */ +#undef DARWIN_CRT1_SPEC +#define DARWIN_CRT1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lcrt1.10.5.o) \ + %:version-compare(>= 10.6 mmacosx-version-min= -lcrt1.10.6.o)" + +#undef DARWIN_DYLIB1_SPEC +#define DARWIN_DYLIB1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)" + +#undef DARWIN_BUNDLE1_SPEC +#define DARWIN_BUNDLE1_SPEC \ + "%:version-compare(!> 10.6 mmacosx-version-min= -lbundle1.o)" +/* APPLE LOCAL end link optimizations 6499452 */ + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + DARWIN_EXTRA_SPECS \ + { "darwin_arch", DARWIN_ARCH_SPEC }, \ + { "darwin_crt2", "" }, \ + { "darwin_subarch", DARWIN_SUBARCH_SPEC }, +/* APPLE LOCAL end mainline */ + +/* APPLE LOCAL begin prefer -lSystem 6645902 */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{miphoneos-version-min=*: %G %L} \ + %{!miphoneos-version-min=*: \ + %{!static:%:version-compare(>= 10.6 mmacosx-version-min= -lSystem)} %G %L}" +/* APPLE LOCAL end prefer -lSystem 6645902 */ + +/* Use the following macro for any Darwin/x86-specific command-line option + translation. */ +#define SUBTARGET_OPTION_TRANSLATE_TABLE \ + { "", "" } + +/* The Darwin assembler mostly follows AT&T syntax. */ +#undef ASSEMBLER_DIALECT +#define ASSEMBLER_DIALECT ASM_ATT + +/* Define macro used to output shift-double opcodes when the shift + count is in %cl. Some assemblers require %cl as an argument; + some don't. This macro controls what to do: by default, don't + print %cl. */ + +#define SHIFT_DOUBLE_OMITS_COUNT 0 + +extern void darwin_x86_file_end (void); +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END darwin_x86_file_end + +/* Define the syntax of pseudo-ops, labels and comments. */ + +/* String containing the assembler's comment-starter. */ + +#define ASM_COMMENT_START "#" + +/* By default, target has a 80387, uses IEEE compatible arithmetic, + and returns float values in the 387. */ + +#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE) +/* APPLE LOCAL begin mainline */ +/* For darwin we want to target specific processor features as a minimum, + but these unfortunately don't correspond to a specific processor. */ +#undef TARGET_SUBTARGET32_DEFAULT +#define TARGET_SUBTARGET32_DEFAULT (MASK_MMX \ + | MASK_SSE \ + | MASK_SSE2) + +#undef TARGET_SUBTARGET64_DEFAULT +#define TARGET_SUBTARGET64_DEFAULT (MASK_MMX \ + | MASK_SSE \ + | MASK_SSE2 \ + | MASK_SSE3) +/* APPLE LOCAL end mainline */ +/* APPLE LOCAL mdynamic-no-pic */ +/* Remove disabling of mdynamic-no-pic */ + +#undef GOT_SYMBOL_NAME +#define GOT_SYMBOL_NAME (machopic_function_base_name ()) + +/* Define the syntax of pseudo-ops, labels and comments. */ + +#define LPREFIX "L" + +/* These are used by -fbranch-probabilities */ +#define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions" +#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \ + "__TEXT,__unlikely,regular,pure_instructions" + +/* Assembler pseudos to introduce constants of various size. */ + +#define ASM_BYTE_OP "\t.byte\t" +#define ASM_SHORT "\t.word\t" +#define ASM_LONG "\t.long\t" +#define ASM_QUAD "\t.quad\t" + +#define SUBTARGET_ENCODE_SECTION_INFO darwin_encode_section_info + +#undef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { if ((LOG) != 0) \ + { \ + if (in_section == text_section) \ + fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \ + else \ + fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \ + } \ + } while (0) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + /* APPLE LOCAL begin mainline */ \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED))) + /* APPLE LOCAL end mainline */ + +/* This says how to output an assembler line + to define a local common symbol. */ + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED))) + + +/* APPLE LOCAL begin Macintosh alignment 2002-2-19 --ff */ +#if 0 +#define MASK_ALIGN_NATURAL 0x40000000 +#define TARGET_ALIGN_NATURAL (target_flags & MASK_ALIGN_NATURAL) +#define MASK_ALIGN_MAC68K 0x20000000 +#define TARGET_ALIGN_MAC68K (target_flags & MASK_ALIGN_MAC68K) +#endif +#define rs6000_alignment_flags target_flags + +#define ROUND_TYPE_ALIGN(TYPE, COMPUTED, SPECIFIED) \ + (((TREE_CODE (TYPE) == RECORD_TYPE \ + || TREE_CODE (TYPE) == UNION_TYPE \ + || TREE_CODE (TYPE) == QUAL_UNION_TYPE) \ + && OPTION_ALIGN_MAC68K \ + && MAX (COMPUTED, SPECIFIED) == 8) ? 16 \ + : MAX (COMPUTED, SPECIFIED)) +/* APPLE LOCAL end Macintosh alignment 2002-2-19 --ff */ + +/* Darwin profiling -- call mcount. */ +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(FILE, LABELNO) \ + do { \ + /* APPLE LOCAL axe stubs 5571540 */ \ + if (darwin_stubs && MACHOPIC_INDIRECT && !TARGET_64BIT) \ + { \ + const char *name = machopic_mcount_stub_name (); \ + fprintf (FILE, "\tcall %s\n", name+1); /* skip '&' */ \ + machopic_validate_stub_or_non_lazy_ptr (name); \ + } \ + else fprintf (FILE, "\tcall mcount\n"); \ + } while (0) + +/* APPLE LOCAL CW asm blocks */ +extern int flag_iasm_blocks; +/* APPLE LOCAL begin fix-and-continue x86 */ +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS \ + do { \ + /* APPLE LOCAL begin ARM 5683689 */ \ + if (!darwin_macosx_version_min \ + && !darwin_iphoneos_version_min) \ + darwin_macosx_version_min = "10.1"; \ + /* APPLE LOCAL end ARM 5683689 */ \ + /* APPLE LOCAL begin CW asm blocks */ \ + if (flag_iasm_blocks) \ + flag_ms_asms = 1; \ + /* APPLE LOCAL end CW asm blocks */ \ + /* APPLE LOCAL begin constant cfstrings */ \ + if (darwin_constant_cfstrings < 0) \ + darwin_constant_cfstrings = 1; \ + /* APPLE LOCAL end constant cfstrings */ \ + if (TARGET_64BIT) \ + { \ + if (MACHO_DYNAMIC_NO_PIC_P) \ + target_flags &= ~MASK_MACHO_DYNAMIC_NO_PIC; \ + } \ + /* APPLE LOCAL begin fix this for mainline */ \ + /* For mainline this needs to be fixed to have every \ + cpu architecture feature as an isa mask. Every \ + cpu we've shipped supports all of these features. \ + This includes all ix86_arch cpu features currently \ + defined except x86_cmove which is turned on for \ + TARGET_SSE anyhow. */ \ + if (!ix86_arch_string) \ + { \ + x86_cmpxchg = ~(0); \ + x86_cmpxchg8b = ~(0); \ + x86_cmpxchg16b = ~(0); \ + x86_xadd = ~(0); \ + x86_bswap = ~(0); \ + } \ + /* APPLE LOCAL end fix this for mainline */ \ + } while (0) + +/* True, iff we're generating fast turn around debugging code. When + true, we arrange for function prologues to start with 6 nops so + that gdb may insert code to redirect them, and for data to be + accessed indirectly. The runtime uses this indirection to forward + references for data to the original instance of that data. */ + +#define TARGET_FIX_AND_CONTINUE (darwin_fix_and_continue) +/* APPLE LOCAL end fix-and-continue x86 */ + +#define C_COMMON_OVERRIDE_OPTIONS \ + do { \ + SUBTARGET_C_COMMON_OVERRIDE_OPTIONS; \ + } while (0) + +/* APPLE LOCAL begin mainline 4.3 2006-10-31 4370143 */ +/* Removed PREFERRED_DEBUGGING_TYPE */ +/* APPLE LOCAL end mainline 4.3 2006-10-31 4370143 */ + +/* Darwin uses the standard DWARF register numbers but the default + register numbers for STABS. Fortunately for 64-bit code the + default and the standard are the same. */ +#undef DBX_REGISTER_NUMBER +#define DBX_REGISTER_NUMBER(n) \ + (TARGET_64BIT ? dbx64_register_map[n] \ + : write_symbols == DWARF2_DEBUG ? svr4_dbx_register_map[n] \ + : dbx_register_map[n]) + +/* Unfortunately, the 32-bit EH information also doesn't use the standard + DWARF register numbers. */ +#define DWARF2_FRAME_REG_OUT(n, for_eh) \ + (! (for_eh) || write_symbols != DWARF2_DEBUG || TARGET_64BIT ? (n) \ + : (n) == 5 ? 4 \ + : (n) == 4 ? 5 \ + : (n) >= 11 && (n) <= 18 ? (n) + 1 \ + : (n)) + +/* APPLE LOCAL begin 4457939 stack alignment mishandled */ +/* <rdar://problem/4471596> stack alignment is not handled properly + + Please remove this entire a**le local when addressing this + Radar. */ +extern void ix86_darwin_init_expanders (void); +#define INIT_EXPANDERS (ix86_darwin_init_expanders ()) +/* APPLE LOCAL end 4457939 stack alignment mishandled */ + + +/* APPLE LOCAL begin CW asm blocks */ +#define IASM_VALID_PIC(DECL, E) \ + do { \ + if (! TARGET_64BIT \ + && E->as_immediate && ! MACHO_DYNAMIC_NO_PIC_P && flag_pic) \ + warning (0, "non-pic addressing form not suitible for pic code"); \ + } while (0) +#define IASM_RIP(X) do { if (TARGET_64BIT) strcat (X, "(%%rip)"); } while (0) +/* APPLE LOCAL end cw asm blocks */ + +/* APPLE LOCAL KEXT */ +#define TARGET_SUPPORTS_KEXTABI1 (! TARGET_64BIT) + + +#undef REGISTER_TARGET_PRAGMAS +#define REGISTER_TARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS() + +#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES +#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes + +/* APPLE LOCAL begin mainline */ +/* For 64-bit, we need to add 4 because @GOTPCREL is relative to the + end of the instruction, but without the 4 we'd only have the right + address for the start of the instruction. */ +#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \ + if (TARGET_64BIT) \ + { \ + if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel) \ + { \ + fputs (ASM_LONG, FILE); \ + assemble_name (FILE, XSTR (ADDR, 0)); \ + fputs ("+4@GOTPCREL", FILE); \ + goto DONE; \ + } \ + } \ + else \ + { \ + if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) \ + { \ + darwin_non_lazy_pcrel (FILE, ADDR); \ + goto DONE; \ + } \ + } +/* APPLE LOCAL end mainline */ +/* APPLE LOCAL begin track initialization status 4964532 */ +/* APPLE LOCAL begin ARM 5683689 */ +#undef TARGET_DWARF_UNINIT_VARS +#define TARGET_DWARF_UNINIT_VARS \ + (darwin_iphoneos_version_min || \ + strverscmp (darwin_macosx_version_min, "10.4") >= 0) +/* APPLE LOCAL end ARM 5683689 */ +/* APPLE LOCAL end track initialization status 4964532 */ + +/* This needs to move since i386 uses the first flag and other flags are + used in Mach-O. */ +#undef MACHO_SYMBOL_FLAG_VARIABLE +#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3) diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt new file mode 100644 index 000000000..90854e1ca --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt @@ -0,0 +1,15 @@ +; APPLE LOCAL begin Macintosh alignment 2002-2-19 --ff +malign-mac68k +Target Report Mask(ALIGN_MAC68K) Var(darwin_alignment_flags) +Align structs and unions according to mac68k rules + +malign-natural +Target Report Mask(ALIGN_NATURAL) Var(darwin_alignment_flags) +Align structs and unions according to natural rules + +; Maybe we don't need this. +;malign-power +;; I want this to clear MASK_ALIGN_MAC68K | MASK_ALIGN_NATURAL +;Target Undocumented InverseMask(ALIGN_MAC68K) +;Align structs and unions according to PowerPC rules +; APPLE LOCAL end Macintosh alignment 2002-2-19 --ff diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h b/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h new file mode 100644 index 000000000..e630a7064 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h @@ -0,0 +1,43 @@ +/* Target definitions for x86_64 running Darwin. + Copyright (C) 2006 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (x86_64 Darwin)"); + +#undef DARWIN_ARCH_SPEC +#define DARWIN_ARCH_SPEC "%{m32:i386;:x86_64}" + +#undef DARWIN_SUBARCH_SPEC +#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC + +/* APPLE LOCAL begin kext 6400713 */ +#undef ASM_SPEC +#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \ + %{mkernel|static|fapple-kext:%{m32:-static}}" +/* APPLE LOCAL end kext 6400713 */ + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + /* APPLE LOCAL 6015949 */ \ + DARWIN_EXTRA_SPECS \ + { "darwin_arch", DARWIN_ARCH_SPEC }, \ + { "darwin_crt2", "" }, \ + { "darwin_subarch", DARWIN_SUBARCH_SPEC }, diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c new file mode 100644 index 000000000..ffcee4e55 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c @@ -0,0 +1,300 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include <stdlib.h> + +const char *host_detect_local_cpu (int argc, const char **argv); + +#ifdef GCC_VERSION +#define cpuid(num,a,b,c,d) \ + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (num)) + +#define bit_CMPXCHG8B (1 << 8) +#define bit_CMOV (1 << 15) +#define bit_MMX (1 << 23) +#define bit_SSE (1 << 25) +#define bit_SSE2 (1 << 26) + +#define bit_SSE3 (1 << 0) +#define bit_CMPXCHG16B (1 << 13) + +#define bit_3DNOW (1 << 31) +#define bit_3DNOWP (1 << 30) +#define bit_LM (1 << 29) + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "arch" or "tune" as argument depending on if -march=native + or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-march=k8" on an AMD64 machine + for -march=native. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char *host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu = NULL; + enum processor_type processor = PROCESSOR_I386; + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + unsigned int vendor; + unsigned int ext_level; + unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0; + unsigned char has_sse2 = 0, has_sse3 = 0, has_cmov = 0; + unsigned char has_longmode = 0, has_cmpxchg8b = 0; + unsigned char is_amd = 0; + unsigned int family = 0; + bool arch; + + if (argc < 1) + return NULL; + + arch = strcmp (argv[0], "arch") == 0; + if (!arch && strcmp (argv[0], "tune")) + return NULL; + +#ifndef __x86_64__ + /* See if we can use cpuid. */ + asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;" + "pushl %0; popfl; pushfl; popl %0; popfl" + : "=&r" (eax), "=&r" (ebx) + : "i" (0x00200000)); + + if (((eax ^ ebx) & 0x00200000) == 0) + goto done; +#endif + + processor = PROCESSOR_PENTIUM; + + /* Check the highest input value for eax. */ + cpuid (0, eax, ebx, ecx, edx); + max_level = eax; + /* We only look at the first four characters. */ + vendor = ebx; + if (max_level == 0) + goto done; + + cpuid (1, eax, ebx, ecx, edx); + has_cmpxchg8b = !!(edx & bit_CMPXCHG8B); + has_cmov = !!(edx & bit_CMOV); + has_mmx = !!(edx & bit_MMX); + has_sse = !!(edx & bit_SSE); + has_sse2 = !!(edx & bit_SSE2); + has_sse3 = !!(ecx & bit_SSE3); + /* We don't care for extended family. */ + family = (eax >> 8) & ~(1 << 4); + + cpuid (0x80000000, eax, ebx, ecx, edx); + ext_level = eax; + if (ext_level >= 0x80000000) + { + cpuid (0x80000001, eax, ebx, ecx, edx); + has_3dnow = !!(edx & bit_3DNOW); + has_3dnowp = !!(edx & bit_3DNOWP); + has_longmode = !!(edx & bit_LM); + } + + is_amd = vendor == *(unsigned int*)"Auth"; + + if (is_amd) + { + if (has_mmx) + processor = PROCESSOR_K6; + if (has_3dnowp) + processor = PROCESSOR_ATHLON; + if (has_sse2 || has_longmode) + processor = PROCESSOR_K8; + } + else + { + switch (family) + { + case 5: + /* Default is PROCESSOR_PENTIUM. */ + break; + case 6: + processor = PROCESSOR_PENTIUMPRO; + break; + case 15: + processor = PROCESSOR_PENTIUM4; + break; + default: + /* We have no idea. Use something reasonable. */ + if (arch) + { + if (has_sse3) + { + if (has_longmode) + cpu = "nocona"; + else + cpu = "prescott"; + } + else if (has_sse2) + cpu = "pentium4"; + else if (has_cmov) + cpu = "pentiumpro"; + else if (has_mmx) + cpu = "pentium-mmx"; + else if (has_cmpxchg8b) + cpu = "pentium"; + else + cpu = "i386"; + } + else + cpu = "generic"; + goto done; + break; + } + } + + switch (processor) + { + case PROCESSOR_I386: + cpu = "i386"; + break; + case PROCESSOR_I486: + cpu = "i486"; + break; + case PROCESSOR_PENTIUM: + if (has_mmx) + cpu = "pentium-mmx"; + else + cpu = "pentium"; + break; + case PROCESSOR_PENTIUMPRO: + if (arch) + { + if (has_sse3) + { + if (has_longmode) + { + /* It is Core 2 Duo. */ + cpu = "nocona"; + } + else + { + /* It is Core Duo. */ + cpu = "prescott"; + } + } + else if (has_sse2) + { + /* It is Pentium M. */ + cpu = "pentium4"; + } + else if (has_sse) + { + /* It is Pentium III. */ + cpu = "pentium3"; + } + else if (has_mmx) + { + /* It is Pentium II. */ + cpu = "pentium2"; + } + else + { + /* Default to Pentium Pro. */ + cpu = "pentiumpro"; + } + } + else + { + /* For -mtune, we default to -mtune=generic. */ + cpu = "generic"; + } + break; + case PROCESSOR_K6: + if (has_3dnow) + cpu = "k6-3"; + else + cpu = "k6"; + break; + case PROCESSOR_ATHLON: + if (has_sse) + cpu = "athlon-4"; + else + cpu = "athlon"; + break; + case PROCESSOR_PENTIUM4: + if (has_sse3) + { + if (has_longmode) + cpu = "nocona"; + else + cpu = "prescott"; + } + else + cpu = "pentium4"; + break; + case PROCESSOR_K8: + cpu = "k8"; + break; + case PROCESSOR_NOCONA: + cpu = "nocona"; + break; + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + cpu = "generic"; + break; + default: + abort (); + break; + } + +done: + return concat ("-m", argv[0], "=", cpu, NULL); +} +#else +/* If we aren't compiling with GCC we just provide a minimal + default value. */ +const char *host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu; + bool arch; + + if (argc < 1) + return NULL; + + arch = strcmp (argv[0], "arch") == 0; + if (!arch && strcmp (argv[0], "tune")) + return NULL; + + if (arch) + { + /* FIXME: i386 is wrong for 64bit compiler. How can we tell if + we are generating 64bit or 32bit code? */ + cpu = "i386"; + } + else + cpu = "generic"; + + return concat ("-m", argv[0], "=", cpu, NULL); +} +#endif /* GCC_VERSION */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h new file mode 100644 index 000000000..857ea6ff9 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h @@ -0,0 +1,1981 @@ +/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */ +/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef _EMMINTRIN_H_INCLUDED +#define _EMMINTRIN_H_INCLUDED + +#ifdef __SSE2__ +#include <xmmintrin.h> + +/* SSE2 */ +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef short __v8hi __attribute__ ((__vector_size__ (16))); +typedef char __v16qi __attribute__ ((__vector_size__ (16))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Create a selector for use with the SHUFPD instruction. */ +#define _MM_SHUFFLE2(fp1,fp0) \ + (((fp1) << 1) | (fp0)) + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* APPLE LOCAL begin radar 4152603 */ +/* Create a vector with element 0 as F and the rest zero. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_sd (double __F) +{ + return __extension__ (__m128d){ __F, 0 }; +} + +/* Create a vector with both elements equal to F. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_pd (double __F) +{ + return __extension__ (__m128d){ __F, __F }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pd1 (double __F) +{ + return _mm_set1_pd (__F); +} + +/* Create a vector with the lower value X and upper value W. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pd (double __W, double __X) +{ + return __extension__ (__m128d){ __X, __W }; +} + +/* Create a vector with the lower value W and upper value X. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_pd (double __W, double __X) +{ + return __extension__ (__m128d){ __W, __X }; +} + +/* Create a vector of zeros. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setzero_pd (void) +{ + return __extension__ (__m128d){ 0.0, 0.0 }; +} + +/* Sets the low DPFP value of A from the low value of B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_move_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); +} + +/* Load two DPFP values from P. The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_pd (double const *__P) +{ + return *(__m128d *)__P; +} + +/* Load two DPFP values from P. The address need not be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadu_pd (double const *__P) +{ + return __builtin_ia32_loadupd (__P); +} + +/* Create a vector with all two elements equal to *P. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load1_pd (double const *__P) +{ + return _mm_set1_pd (*__P); +} + +/* Create a vector with element 0 as *P and the rest zero. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_sd (double const *__P) +{ + return _mm_set_sd (*__P); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_pd1 (double const *__P) +{ + return _mm_load1_pd (__P); +} + +/* Load two DPFP values in reverse order. The address must be aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadr_pd (double const *__P) +{ + __m128d __tmp = _mm_load_pd (__P); + return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); +} + +/* Store two DPFP values. The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_pd (double *__P, __m128d __A) +{ + *(__m128d *)__P = __A; +} + +/* Store two DPFP values. The address need not be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storeu_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeupd (__P, __A); +} + +/* Stores the lower DPFP value. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_sd (double *__P, __m128d __A) +{ + *__P = __builtin_ia32_vec_ext_v2df (__A, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE double __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsd_f64 (__m128d __A) +{ + return __builtin_ia32_vec_ext_v2df (__A, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storel_pd (double *__P, __m128d __A) +{ + _mm_store_sd (__P, __A); +} + +/* Stores the upper DPFP value. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storeh_pd (double *__P, __m128d __A) +{ + *__P = __builtin_ia32_vec_ext_v2df (__A, 1); +} + +/* Store the lower DPFP value across two words. + The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store1_pd (double *__P, __m128d __A) +{ + _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_pd1 (double *__P, __m128d __A) +{ + _mm_store1_pd (__P, __A); +} + +/* Store two DPFP values in reverse order. The address must be aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storer_pd (double *__P, __m128d __A) +{ + _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi128_si32 (__m128i __A) +{ + return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); +} + +#ifdef __x86_64__ +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi128_si64 (__m128i __A) +{ + return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); +} +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_div_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_div_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sqrt_pd (__m128d __A) +{ + return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); +} + +/* Return pair {sqrt (A[0), B[1]}. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sqrt_sd (__m128d __A, __m128d __B) +{ + __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); + return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_and_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_andnot_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_or_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_xor_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmple_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpneq_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnlt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnle_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpngt_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnge_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpunord_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmple_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpltsd ((__v2df) __B, + (__v2df) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmplesd ((__v2df) __B, + (__v2df) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpneq_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnlt_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnle_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpngt_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnltsd ((__v2df) __B, + (__v2df) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnge_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df) __A, + (__v2df) + __builtin_ia32_cmpnlesd ((__v2df) __B, + (__v2df) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpunord_sd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomieq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomilt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomile_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomigt_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomige_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomineq_sd (__m128d __A, __m128d __B) +{ + return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); +} + +/* Create a vector of Qi, where i is the element number. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_epi64x (long long __q1, long long __q0) +{ + return __extension__ (__m128i)(__v2di){ __q0, __q1 }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_epi64 (__m64 __q1, __m64 __q0) +{ + return _mm_set_epi64x ((long long)__q1, (long long)__q0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) +{ + return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, + short __q3, short __q2, short __q1, short __q0) +{ + return __extension__ (__m128i)(__v8hi){ + __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, + char __q11, char __q10, char __q09, char __q08, + char __q07, char __q06, char __q05, char __q04, + char __q03, char __q02, char __q01, char __q00) +{ + return __extension__ (__m128i)(__v16qi){ + __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, + __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 + }; +} + +/* APPLE LOCAL begin 4220129 */ +/* functions moved to end of file */ +/* APPLE LOCAL end 4220129 */ + +/* Create a vector of Qi, where i is the element number. + The parameter order is reversed from the _mm_set_epi* functions. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_epi64 (__m64 __q0, __m64 __q1) +{ + return _mm_set_epi64 (__q1, __q0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) +{ + return _mm_set_epi32 (__q3, __q2, __q1, __q0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, + short __q4, short __q5, short __q6, short __q7) +{ + return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, + char __q04, char __q05, char __q06, char __q07, + char __q08, char __q09, char __q10, char __q11, + char __q12, char __q13, char __q14, char __q15) +{ + return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, + __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); +} + +/* Create a vector with element 0 as *P and the rest zero. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_si128 (__m128i const *__P) +{ + return *__P; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); +} + +/* APPLE LOCAL begin 4099020 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadl_epi64 (__m128i const *__P) +{ + return (__m128i)__builtin_ia32_loadlv4si ((__v2si *)__P); +} +/* APPLE LOCAL end 4099020 */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + *__P = __B; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storeu_si128 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); +} + +/* APPLE LOCAL begin 4099020 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storel_epi64 (__m128i *__P, __m128i __B) +{ + __builtin_ia32_storelv4si ((__v2si *)__P, __B); +} +/* APPLE LOCAL end 4099020 */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movepi64_pi64 (__m128i __B) +{ + return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movpi64_epi64 (__m64 __A) +{ + return _mm_set_epi64 ((__m64)0LL, __A); +} + +/* APPLE LOCAL begin 4099020 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_move_epi64 (__m128i __A) +{ + return (__m128i)__builtin_ia32_movqv4si ((__v4si)__A) ; +} +/* APPLE LOCAL end 4099020 */ + +/* Create a vector of zeros. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setzero_si128 (void) +{ + return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtepi32_pd (__m128i __A) +{ + return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtepi32_ps (__m128i __A) +{ + return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpd_ps (__m128d __A) +{ + return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttpd_epi32 (__m128d __A) +{ + return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttpd_pi32 (__m128d __A) +{ + return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpi32_pd (__m64 __A) +{ + return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttps_epi32 (__m128 __A) +{ + return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtps_pd (__m128 __A) +{ + return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvtsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsd_si64 (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttsd_si32 (__m128d __A) +{ + return __builtin_ia32_cvttsd2si ((__v2df) __A); +} + +#ifdef __x86_64__ +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttsd_si64 (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsd_ss (__m128 __A, __m128d __B) +{ + return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi32_sd (__m128d __A, int __B) +{ + return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); +} + +#ifdef __x86_64__ +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtss_sd (__m128d __A, __m128 __B) +{ + return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); +} + +/* APPLE LOCAL 5814283 */ +#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)(__A), (__v2df)(__B), (__C))) + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_pd (__m128d __A, __m128d __B) +{ + return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadh_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadl_pd (__m128d __A, double const *__B) +{ + return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movemask_pd (__m128d __A) +{ + return __builtin_ia32_movmskpd ((__v2df)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packs_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packus_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_madd_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mullo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_su32 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_epu32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); +} + +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); +} +#else +#define _mm_slli_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B)) +#define _mm_slli_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_pslldi128 ((__v8hi)(__A), __B)) +#define _mm_slli_epi64(__A, __B) \ + ((__m128i)__builtin_ia32_psllqi128 ((__v8hi)(__A), __B)) +#endif + +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srai_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srai_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); +} +#else +#define _mm_srai_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B)) +#define _mm_srai_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_psradi128 ((__v8hi)(__A), __B)) +#endif + +#if 0 +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, int __B) +{ + return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8)); +} + +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, int __B) +{ + return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8)); +} +#else +/* APPLE LOCAL begin 5919583 */ +#define _mm_srli_si128 (__m128i)__builtin_ia32_psrldqi128_byteshift +#define _mm_slli_si128 (__m128i)__builtin_ia32_pslldqi128_byteshift +/* APPLE LOCAL end 5919583 */ +#endif + +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_epi16 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_epi32 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_epi64 (__m128i __A, int __B) +{ + return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); +} +#else +#define _mm_srli_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B)) +#define _mm_srli_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B)) +#define _mm_srli_epi64(__A, __B) \ + ((__m128i)__builtin_ia32_psrlqi128 ((__v4si)(__A), __B)) +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sra_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sra_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_and_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_andnot_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_or_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_xor_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); +} + +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_extract_epi16 (__m128i const __A, int const __N) +{ + return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) +{ + return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); +} +#else +#define _mm_extract_epi16(A, N) \ + ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N))) +#define _mm_insert_epi16(A, D, N) \ + ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N))) +#endif + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movemask_epi8 (__m128i __A) +{ + return __builtin_ia32_pmovmskb128 ((__v16qi)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhi_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin 5814283 */ +#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__A), __B)) +#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__A), __B)) +#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)(__A), __B)) +/* APPLE LOCAL end 5814283 */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) +{ + __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_avg_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_avg_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sad_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_stream_si32 (int *__A, int __B) +{ + __builtin_ia32_movnti (__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_stream_si128 (__m128i *__A, __m128i __B) +{ + __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_stream_pd (double *__A, __m128d __B) +{ + __builtin_ia32_movntpd (__A, (__v2df)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_clflush (void const *__A) +{ + __builtin_ia32_clflush (__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_lfence (void) +{ + __builtin_ia32_lfence (); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mfence (void) +{ + __builtin_ia32_mfence (); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi32_si128 (int __A) +{ + return _mm_set_epi32 (0, 0, 0, __A); +} + +#ifdef __x86_64__ +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_si128 (long long __A) +{ + return _mm_set_epi64x (0, __A); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64x_si128 (long long __A) +{ + return _mm_set_epi64x (0, __A); +} +#endif + +/* Casts between various SP, DP, INT vector types. Note that these do no + conversion of values, they just change the type. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castpd_ps(__m128d __A) +{ + return (__m128) __A; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castpd_si128(__m128d __A) +{ + return (__m128i) __A; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castps_pd(__m128 __A) +{ + return (__m128d) __A; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castps_si128(__m128 __A) +{ + return (__m128i) __A; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castsi128_ps(__m128i __A) +{ + return (__m128) __A; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_castsi128_pd(__m128i __A) +{ + return (__m128d) __A; +} +/* APPLE LOCAL end radar 4152603 */ + +/* APPLE LOCAL begin 4220129, 4286110 */ +/* Set all of the elements of the vector to A. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_epi64x (long long __A) +{ + return _mm_set_epi64x (__A, __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_epi64 (__m64 __A) +{ + return _mm_set_epi64 (__A, __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_epi32 (int __A) +{ + return _mm_set_epi32 (__A, __A, __A, __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_epi16 (short __A) +{ + __m128i temp, temp2, temp3; + temp = _mm_cvtsi32_si128((int)__A); + temp2 = _mm_unpacklo_epi16(temp, temp); + temp3 = _mm_shuffle_epi32(temp2, 0); + return temp3; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_epi8 (char __A) +{ + __m128i temp, temp2, temp3, temp4; + temp = _mm_cvtsi32_si128 ((int)__A); + temp2 = _mm_unpacklo_epi8 (temp, temp); + temp3 = _mm_unpacklo_epi8 (temp2, temp2); + temp4 = _mm_shuffle_epi32 (temp3, 0); + return temp4; +} +/* APPLE LOCAL end 4220129, 4286110 */ + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +#endif /* __SSE2__ */ + +#endif /* _EMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h b/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h new file mode 100644 index 000000000..20d7f5e04 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2004 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +#ifndef _MM_MALLOC_H_INCLUDED +#define _MM_MALLOC_H_INCLUDED + +#include <stdlib.h> +#include <errno.h> + +static __inline__ void* +_mm_malloc (size_t size, size_t align) +{ + void * malloc_ptr; + void * aligned_ptr; + + /* Error if align is not a power of two. */ + if (align & (align - 1)) + { + errno = EINVAL; + return ((void*) 0); + } + + if (size == 0) + return ((void *) 0); + + /* Assume malloc'd pointer is aligned at least to sizeof (void*). + If necessary, add another sizeof (void*) to store the value + returned by malloc. Effectively this enforces a minimum alignment + of sizeof double. */ + if (align < 2 * sizeof (void *)) + align = 2 * sizeof (void *); + + malloc_ptr = malloc (size + align); + if (!malloc_ptr) + return ((void *) 0); + + /* Align We have at least sizeof (void *) space below malloc'd ptr. */ + aligned_ptr = (void *) (((size_t) malloc_ptr + align) + & ~((size_t) (align) - 1)); + + /* Store the original pointer just before p. */ + ((void **) aligned_ptr) [-1] = malloc_ptr; + + return aligned_ptr; +} + +static __inline__ void +_mm_free (void * aligned_ptr) +{ + if (aligned_ptr) + free (((void **) aligned_ptr) [-1]); +} + +#endif /* _MM_MALLOC_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h b/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h new file mode 100644 index 000000000..e9a621871 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h @@ -0,0 +1,7 @@ +/* We do not want to output SDB debugging information. */ + +#undef SDB_DEBUGGING_INFO + +/* We want to output DBX debugging information. */ + +#define DBX_DEBUGGING_INFO 1 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c b/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c new file mode 100644 index 000000000..3025bdae6 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c @@ -0,0 +1,31 @@ +/* i386-darwin host-specific hook definitions. + Copyright (C) 2003, 2005 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hosthooks.h" +#include "hosthooks-def.h" +#include "config/host-darwin.h" + +/* Darwin doesn't do anything special for x86 hosts; this file exists just + to include config/host-darwin.h. */ + +const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER; diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def b/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def new file mode 100644 index 000000000..3cb4cb1b8 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def @@ -0,0 +1,97 @@ +/* Definitions of target machine for GCC for IA-32. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* The x86_64 ABI specifies both XF and TF modes. + XFmode is __float80 is IEEE extended; TFmode is __float128 + is IEEE quad. */ + +FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format); +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* In ILP32 mode, XFmode has size 12 and alignment 4. + In LP64 mode, XFmode has size and alignment 16. */ +ADJUST_FLOAT_FORMAT (XF, (TARGET_128BIT_LONG_DOUBLE + ? &ieee_extended_intel_128_format + : TARGET_96_ROUND_53_LONG_DOUBLE + ? &ieee_extended_intel_96_round_53_format + : &ieee_extended_intel_96_format)); +ADJUST_BYTESIZE (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 12); +ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4); + +/* Add any extra modes needed to represent the condition code. + + For the i386, we need separate modes when floating-point + equality comparisons are being done. + + Add CCNO to indicate comparisons against zero that requires + Overflow flag to be unset. Sign bit test is used instead and + thus can be used to form "a&b>0" type of tests. + + Add CCGC to indicate comparisons against zero that allows + unspecified garbage in the Carry flag. This mode is used + by inc/dec instructions. + + Add CCGOC to indicate comparisons against zero that allows + unspecified garbage in the Carry and Overflow flag. This + mode is used to simulate comparisons of (a-b) and (a+b) + against zero using sub/cmp/add operations. + + APPLE LOCAL begin 5612787 mainline sse4 + Add CCA to indicate that only the Above flag is valid. + Add CCC to indicate that only the Carry flag is valid. + Add CCO to indicate that only the Overflow flag is valid. + Add CCS to indicate that only the Sign flag is valid. + APPLE LOCAL end 5612787 mainline sse4 + Add CCZ to indicate that only the Zero flag is valid. */ + +CC_MODE (CCGC); +CC_MODE (CCGOC); +CC_MODE (CCNO); +/* APPLE LOCAL begin 5612787 mainline sse4 */ +CC_MODE (CCA); +CC_MODE (CCC); +CC_MODE (CCO); +CC_MODE (CCS); +/* APPLE LOCAL end 5612787 mainline sse4 */ +CC_MODE (CCZ); +CC_MODE (CCFP); +CC_MODE (CCFPU); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +/* APPLE LOCAL 5612787 mainline sse4 */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +/* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ +VECTOR_MODE (INT, DI, 1); /* V1DI (__mm64) */ +VECTOR_MODE (INT, SI, 1); /* V1SI */ +/* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + +/* The symbol Pmode stands for one of the above machine modes (usually SImode). + The tm.h file specifies which one. It is not a distinct mode. */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h b/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h new file mode 100644 index 000000000..f92428e55 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h @@ -0,0 +1,261 @@ +/* Definitions of target machine for GCC for IA-32. + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1996, 1997, 1998, 1999, + 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Functions in i386.c */ +extern void override_options (void); +extern void optimization_options (int, int); + +extern int ix86_can_use_return_insn_p (void); +extern int ix86_frame_pointer_required (void); +extern void ix86_setup_frame_addresses (void); + +extern void ix86_file_end (void); +extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int); +extern void ix86_expand_prologue (void); +extern void ix86_expand_epilogue (int); + +extern void ix86_output_addr_vec_elt (FILE *, int); +extern void ix86_output_addr_diff_elt (FILE *, int, int); + +#ifdef RTX_CODE +extern int ix86_aligned_p (rtx); + +extern int standard_80387_constant_p (rtx); +extern const char *standard_80387_constant_opcode (rtx); +extern rtx standard_80387_constant_rtx (int); +extern int standard_sse_constant_p (rtx); +extern const char *standard_sse_constant_opcode (rtx, rtx); +extern int symbolic_reference_mentioned_p (rtx); +extern bool extended_reg_mentioned_p (rtx); +extern bool x86_extended_QIreg_mentioned_p (rtx); +extern bool x86_extended_reg_mentioned_p (rtx); +extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); + +extern int ix86_expand_movmem (rtx, rtx, rtx, rtx); +extern int ix86_expand_clrmem (rtx, rtx, rtx); +extern int ix86_expand_strlen (rtx, rtx, rtx, rtx); + +extern bool legitimate_constant_p (rtx); +extern bool constant_address_p (rtx); +extern bool legitimate_pic_operand_p (rtx); +extern int legitimate_pic_address_disp_p (rtx); +extern int legitimate_address_p (enum machine_mode, rtx, int); +extern rtx legitimize_address (rtx, rtx, enum machine_mode); + +extern void print_reg (rtx, int, FILE*); +extern void print_operand (FILE*, rtx, int); +extern void print_operand_address (FILE*, rtx); +extern bool output_addr_const_extra (FILE*, rtx); + +extern void split_di (rtx[], int, rtx[], rtx[]); +extern void split_ti (rtx[], int, rtx[], rtx[]); + +extern const char *output_set_got (rtx, rtx); +extern const char *output_387_binary_op (rtx, rtx*); +extern const char *output_387_reg_move (rtx, rtx*); +extern const char *output_fix_trunc (rtx, rtx*, int); +extern const char *output_fp_compare (rtx, rtx*, int, int); + +extern void ix86_expand_clear (rtx); +extern void ix86_expand_move (enum machine_mode, rtx[]); +extern void ix86_expand_vector_move (enum machine_mode, rtx[]); +extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]); +extern void ix86_expand_push (enum machine_mode, rtx); +extern rtx ix86_fixup_binary_operands (enum rtx_code, + enum machine_mode, rtx[]); +extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, + enum machine_mode, rtx[]); +extern void ix86_expand_binary_operator (enum rtx_code, + enum machine_mode, rtx[]); +extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); +extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, + rtx[]); +/* APPLE LOCAL begin 4176531 4424891 */ +extern const char *ix86_expand_convert_uns_DF2SI_sse (rtx *); +extern const char *ix86_expand_convert_uns_SF2SI_sse (rtx *); +extern const char *ix86_expand_convert_uns_DI2DF_sse (rtx *); +extern const char *ix86_expand_convert_uns_SI2DF_sse (rtx *); +extern const char *ix86_expand_convert_sign_DI2DF_sse (rtx *); +/* APPLE LOCAL end 4176531 4424891 */ +extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool); +extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode, + rtx[]); +extern void ix86_expand_copysign (rtx []); +extern void ix86_split_copysign_const (rtx []); +extern void ix86_split_copysign_var (rtx []); +extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); +extern int ix86_match_ccmode (rtx, enum machine_mode); +extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *); +extern int ix86_use_fcomi_compare (enum rtx_code); +extern void ix86_expand_branch (enum rtx_code, rtx); +extern int ix86_expand_setcc (enum rtx_code, rtx); +extern int ix86_expand_int_movcc (rtx[]); +extern int ix86_expand_fp_movcc (rtx[]); +extern bool ix86_expand_fp_vcond (rtx[]); +extern bool ix86_expand_int_vcond (rtx[]); +/* APPLE LOCAL begin 5612787 mainline sse4 */ +extern void ix86_expand_sse_unpack (rtx[], bool, bool); +extern void ix86_expand_sse4_unpack (rtx[], bool, bool); +/* APPLE LOCAL end 5612787 mainline sse4 */ +extern int ix86_expand_int_addcc (rtx[]); +extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); +extern void x86_initialize_trampoline (rtx, rtx, rtx); +extern rtx ix86_zero_extend_to_Pmode (rtx); +extern void ix86_split_long_move (rtx[]); +extern void ix86_split_ashl (rtx *, rtx, enum machine_mode); +extern void ix86_split_ashr (rtx *, rtx, enum machine_mode); +extern void ix86_split_lshr (rtx *, rtx, enum machine_mode); +extern rtx ix86_find_base_term (rtx); +extern int ix86_check_movabs (rtx, int); + +extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot); +extern int ix86_attr_length_immediate_default (rtx, int); +extern int ix86_attr_length_address_default (rtx); + +extern enum machine_mode ix86_fp_compare_mode (enum rtx_code); + +extern rtx ix86_libcall_value (enum machine_mode); +extern bool ix86_function_value_regno_p (int); +extern bool ix86_function_arg_regno_p (int); +extern int ix86_function_arg_boundary (enum machine_mode, tree); +extern int ix86_return_in_memory (tree); +/* APPLE LOCAL radar 4781080 */ +extern bool ix86_objc_fpreturn_msgcall (tree, bool); +extern void ix86_va_start (tree, rtx); +extern rtx ix86_va_arg (tree, tree); + +extern rtx ix86_force_to_memory (enum machine_mode, rtx); +extern void ix86_free_from_memory (enum machine_mode); +extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx, + rtx, rtx, rtx, rtx); +extern bool ix86_hard_regno_mode_ok (int, enum machine_mode); +extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode); +extern int ix86_register_move_cost (enum machine_mode, enum reg_class, + enum reg_class); +extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class, + enum machine_mode, int); +extern bool ix86_cannot_change_mode_class (enum machine_mode, + enum machine_mode, enum reg_class); +extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class); +extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class); +extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int); +extern int ix86_mode_needed (int, rtx); +extern void emit_i387_cw_initialization (int); +extern bool ix86_fp_jump_nontrivial_p (enum rtx_code); +extern void x86_order_regs_for_local_alloc (void); +extern void x86_function_profiler (FILE *, int); +extern void x86_emit_floatuns (rtx [2]); +extern void ix86_emit_fp_unordered_jump (rtx); + +extern void ix86_emit_i387_log1p (rtx, rtx); + +extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode); + +#ifdef TREE_CODE +extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); +extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int); +extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + tree, int); +extern rtx ix86_function_value (tree, tree, bool); +#endif + +#endif + +#ifdef TREE_CODE +extern int ix86_return_pops_args (tree, tree, int); + +extern int ix86_data_alignment (tree, int); +extern int ix86_local_alignment (tree, int); +extern int ix86_constant_alignment (tree, int); +extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *); +extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *); + +extern unsigned int i386_pe_section_type_flags (tree, const char *, int); +extern void i386_pe_asm_named_section (const char *, unsigned int, tree); +extern int x86_field_alignment (tree, int); +#endif + +extern rtx ix86_tls_get_addr (void); +extern rtx ix86_tls_module_base (void); + +extern void ix86_expand_vector_init (bool, rtx, rtx); +extern void ix86_expand_vector_set (bool, rtx, rtx, int); +extern void ix86_expand_vector_extract (bool, rtx, rtx, int); +extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); + +/* In winnt.c */ +extern int i386_pe_dllexport_name_p (const char *); +extern int i386_pe_dllimport_name_p (const char *); +extern void i386_pe_unique_section (tree, int); +extern void i386_pe_declare_function_type (FILE *, const char *, int); +extern void i386_pe_record_external_function (tree, const char *); +extern void i386_pe_record_exported_symbol (const char *, int); +extern void i386_pe_asm_file_end (FILE *); +extern void i386_pe_encode_section_info (tree, rtx, int); +extern const char *i386_pe_strip_name_encoding (const char *); +extern const char *i386_pe_strip_name_encoding_full (const char *); +extern void i386_pe_output_labelref (FILE *, const char *); +extern bool i386_pe_valid_dllimport_attribute_p (tree); + +/* In winnt-cxx.c and winnt-stubs.c */ +extern void i386_pe_adjust_class_at_definition (tree); +extern bool i386_pe_type_dllimport_p (tree); +extern bool i386_pe_type_dllexport_p (tree); + +extern rtx maybe_get_pool_constant (rtx); + +extern char internal_label_prefix[16]; +extern int internal_label_prefix_len; + +enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS }; +struct ix86_address +{ + rtx base, index, disp; + HOST_WIDE_INT scale; + enum ix86_address_seg seg; +}; + +extern int ix86_decompose_address (rtx, struct ix86_address *); +extern int memory_address_length (rtx addr); +extern void x86_output_aligned_bss (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, int); +extern void x86_elf_aligned_common (FILE *, const char *, + unsigned HOST_WIDE_INT, int); + +#ifdef RTX_CODE +extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *, + enum rtx_code *, enum rtx_code *); +extern enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code); +#endif + +/* APPLE LOCAL begin CW asm blocks */ +extern const char *i386_iasm_register_name (const char *regname, char *buf); +extern bool iasm_x86_needs_swapping (const char *); +extern bool iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses, + bool must_be_reg, bool must_not_be_reg, void *); +extern void iasm_x86_print_prefix (char *buf, tree prefix_list); +extern tree iasm_raise_reg (tree); +/* APPLE LOCAL end CW asm blocks */ + +/* APPLE LOCAL 3399553 */ +extern void ix86_expand_flt_rounds (rtx); +extern int asm_preferred_eh_data_format (int, int); diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c new file mode 100644 index 000000000..0e212967a --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c @@ -0,0 +1,23515 @@ +/* Subroutines used for code generation on IA-32. + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-codes.h" +#include "insn-attr.h" +#include "flags.h" +#include "except.h" +#include "function.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "toplev.h" +#include "basic-block.h" +#include "ggc.h" +#include "target.h" +#include "target-def.h" +#include "langhooks.h" +#include "cgraph.h" +#include "tree-gimple.h" +#include "dwarf2.h" +#include "tm-constrs.h" + +/* APPLE LOCAL begin pascal strings */ +#include "../../libcpp/internal.h" +extern struct cpp_reader* parse_in; +/* APPLE LOCAL end pascal strings */ +/* APPLE LOCAL begin regparmandstackparm */ +#include "integrate.h" +#include "tree-inline.h" +#include "splay-tree.h" +#include "tree-pass.h" +#include "c-tree.h" +#include "c-common.h" +/* APPLE LOCAL end regparmandstackparm */ +/* APPLE LOCAL begin dwarf call/pop 5221468 */ +#include "debug.h" +#include "dwarf2out.h" +/* APPLE LOCAL end dwarf call/pop 5221468 */ + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT (-1) +#endif + +/* Return index of given mode in mult and division cost tables. */ +#define MODE_INDEX(mode) \ + ((mode) == QImode ? 0 \ + : (mode) == HImode ? 1 \ + : (mode) == SImode ? 2 \ + : (mode) == DImode ? 3 \ + : 4) + +/* Processor costs (relative to an add) */ +/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ +#define COSTS_N_BYTES(N) ((N) * 2) + +static const +struct processor_costs size_cost = { /* costs for tuning for size */ + COSTS_N_BYTES (2), /* cost of an add instruction */ + COSTS_N_BYTES (3), /* cost of a lea instruction */ + COSTS_N_BYTES (2), /* variable shift costs */ + COSTS_N_BYTES (3), /* constant shift costs */ + {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ + COSTS_N_BYTES (3), /* HI */ + COSTS_N_BYTES (3), /* SI */ + COSTS_N_BYTES (3), /* DI */ + COSTS_N_BYTES (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ + COSTS_N_BYTES (3), /* HI */ + COSTS_N_BYTES (3), /* SI */ + COSTS_N_BYTES (3), /* DI */ + COSTS_N_BYTES (5)}, /* other */ + COSTS_N_BYTES (3), /* cost of movsx */ + COSTS_N_BYTES (3), /* cost of movzx */ + 0, /* "large" insn */ + 2, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {2, 2, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 2}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {2, 2, 2}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 3, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {3, 3}, /* cost of storing MMX registers + in SImode and DImode */ + 3, /* cost of moving SSE register */ + {3, 3, 3}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {3, 3, 3}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ + COSTS_N_BYTES (2), /* cost of FMUL instruction. */ + COSTS_N_BYTES (2), /* cost of FDIV instruction. */ + COSTS_N_BYTES (2), /* cost of FABS instruction. */ + COSTS_N_BYTES (2), /* cost of FCHS instruction. */ + COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ +}; + +/* Processor costs (relative to an add) */ +static const +struct processor_costs i386_cost = { /* 386 specific costs */ + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (3), /* variable shift costs */ + COSTS_N_INSNS (2), /* constant shift costs */ + {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ + COSTS_N_INSNS (6), /* HI */ + COSTS_N_INSNS (6), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + COSTS_N_INSNS (1), /* cost of multiply per each bit set */ + {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (23), /* HI */ + COSTS_N_INSNS (23), /* SI */ + COSTS_N_INSNS (23), /* DI */ + COSTS_N_INSNS (23)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 15, /* "large" insn */ + 3, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (27), /* cost of FMUL instruction. */ + COSTS_N_INSNS (88), /* cost of FDIV instruction. */ + COSTS_N_INSNS (22), /* cost of FABS instruction. */ + COSTS_N_INSNS (24), /* cost of FCHS instruction. */ + COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs i486_cost = { /* 486 specific costs */ + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (3), /* variable shift costs */ + COSTS_N_INSNS (2), /* constant shift costs */ + {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ + COSTS_N_INSNS (12), /* HI */ + COSTS_N_INSNS (12), /* SI */ + COSTS_N_INSNS (12), /* DI */ + COSTS_N_INSNS (12)}, /* other */ + 1, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (40), /* HI */ + COSTS_N_INSNS (40), /* SI */ + COSTS_N_INSNS (40), /* DI */ + COSTS_N_INSNS (40)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 15, /* "large" insn */ + 3, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (16), /* cost of FMUL instruction. */ + COSTS_N_INSNS (73), /* cost of FDIV instruction. */ + COSTS_N_INSNS (3), /* cost of FABS instruction. */ + COSTS_N_INSNS (3), /* cost of FCHS instruction. */ + COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs pentium_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (4), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (11), /* SI */ + COSTS_N_INSNS (11), /* DI */ + COSTS_N_INSNS (11)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (25), /* HI */ + COSTS_N_INSNS (25), /* SI */ + COSTS_N_INSNS (25), /* DI */ + COSTS_N_INSNS (25)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (3), /* cost of FMUL instruction. */ + COSTS_N_INSNS (39), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs pentiumpro_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (4)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (17), /* HI */ + COSTS_N_INSNS (17), /* SI */ + COSTS_N_INSNS (17), /* DI */ + COSTS_N_INSNS (17)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (56), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs k6_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (18), /* HI */ + COSTS_N_INSNS (18), /* SI */ + COSTS_N_INSNS (18), /* DI */ + COSTS_N_INSNS (18)}, /* other */ + COSTS_N_INSNS (2), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 4, /* MOVE_RATIO */ + 3, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 6, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (2), /* cost of FMUL instruction. */ + COSTS_N_INSNS (56), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs athlon_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ + COSTS_N_INSNS (5), /* HI */ + COSTS_N_INSNS (5), /* SI */ + COSTS_N_INSNS (5), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 5, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (24), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs k8_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 3, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 5, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (19), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs pentium4_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (3), /* cost of a lea instruction */ + COSTS_N_INSNS (4), /* variable shift costs */ + COSTS_N_INSNS (4), /* constant shift costs */ + {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ + COSTS_N_INSNS (15), /* HI */ + COSTS_N_INSNS (15), /* SI */ + COSTS_N_INSNS (15), /* DI */ + COSTS_N_INSNS (15)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (56), /* HI */ + COSTS_N_INSNS (56), /* SI */ + COSTS_N_INSNS (56), /* DI */ + COSTS_N_INSNS (56)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 16, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 12, /* cost of moving SSE register */ + {12, 12, 12}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 10, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (7), /* cost of FMUL instruction. */ + COSTS_N_INSNS (43), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ +}; + +static const +struct processor_costs nocona_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ + COSTS_N_INSNS (10), /* HI */ + COSTS_N_INSNS (10), /* SI */ + COSTS_N_INSNS (10), /* DI */ + COSTS_N_INSNS (10)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (66), /* HI */ + COSTS_N_INSNS (66), /* SI */ + COSTS_N_INSNS (66), /* DI */ + COSTS_N_INSNS (66)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 16, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 3, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 6, /* cost of moving MMX register */ + {12, 12}, /* cost of loading MMX registers + in SImode and DImode */ + {12, 12}, /* cost of storing MMX registers + in SImode and DImode */ + 6, /* cost of moving SSE register */ + {12, 12, 12}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {12, 12, 12}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 8, /* MMX or SSE register to integer */ + 128, /* size of prefetch block */ + 8, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (40), /* cost of FDIV instruction. */ + COSTS_N_INSNS (3), /* cost of FABS instruction. */ + COSTS_N_INSNS (3), /* cost of FCHS instruction. */ + COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ +}; +/* APPLE LOCAL begin mainline */ +static const +struct processor_costs core2_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (22), /* HI */ + COSTS_N_INSNS (22), /* SI */ + COSTS_N_INSNS (22), /* DI */ + COSTS_N_INSNS (22)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 16, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {6, 6, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 128, /* size of prefetch block */ + 8, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (32), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ +}; +/* APPLE LOCAL end mainline */ +/* Generic64 should produce code tuned for Nocona and K8. */ +static const +struct processor_costs generic64_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* On all chips taken into consideration lea is 2 cycles and more. With + this cost however our current implementation of synth_mult results in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value + is increased to perhaps more appropriate value of 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ +}; + +/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ +static const +struct processor_costs generic32_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ +}; + +const struct processor_costs *ix86_cost = &pentium_cost; + +/* Processor feature/optimization bitmasks. */ +#define m_386 (1<<PROCESSOR_I386) +#define m_486 (1<<PROCESSOR_I486) +#define m_PENT (1<<PROCESSOR_PENTIUM) +#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) +#define m_K6 (1<<PROCESSOR_K6) +#define m_ATHLON (1<<PROCESSOR_ATHLON) +#define m_PENT4 (1<<PROCESSOR_PENTIUM4) +#define m_K8 (1<<PROCESSOR_K8) +#define m_ATHLON_K8 (m_K8 | m_ATHLON) +#define m_NOCONA (1<<PROCESSOR_NOCONA) +/* APPLE LOCAL mainline */ +#define m_CORE2 (1<<PROCESSOR_CORE2) +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) +#define m_GENERIC (m_GENERIC32 | m_GENERIC64) + +/* Generic instruction choice should be common subset of supported CPUs + (PPro/PENT4/NOCONA/Athlon/K8). */ + +/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for + Generic64 seems like good code size tradeoff. We can't enable it for 32bit + generic because it is not working well with PPro base chips. */ +/* APPLE LOCAL begin mainline */ +const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_CORE2 | m_GENERIC64; +const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_zero_extend_with_and = m_486 | m_PENT; +const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC /* m_386 | m_K6 */; +const int x86_double_with_add = ~m_386; +const int x86_use_bit_test = m_386; +const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC; +const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; +const int x86_3dnow_a = m_ATHLON_K8; +/* APPLE LOCAL end mainline */ +const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; +/* Branch hints were put in P4 based on simulation result. But + after P4 was made, no performance benefit was observed with + branch hints. It also increases the code size. As the result, + icc never generates branch hints. */ +const int x86_branch_hints = 0; +const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */ +/* We probably ought to watch for partial register stalls on Generic32 + compilation setting as well. However in current implementation the + partial register stalls are not eliminated very well - they can + be introduced via subregs synthesized by combine and can happen + in caller/callee saving sequences. + Because this option pays back little on PPro based chips and is in conflict + with partial reg. dependencies used by Athlon/P4 based chips, it is better + to leave it off for generic32 for now. */ +const int x86_partial_reg_stall = m_PPRO; +/* APPLE LOCAL begin mainline */ +const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC; +const int x86_use_himode_fiop = m_386 | m_486 | m_K6; +const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC); +const int x86_use_mov0 = m_K6; +const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC); +const int x86_read_modify_write = ~m_PENT; +const int x86_read_modify = ~(m_PENT | m_PPRO); +const int x86_split_long_moves = m_PPRO; +const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */ +const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); +const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; +const int x86_qimode_math = ~(0); +const int x86_promote_qi_regs = 0; +/* On PPro this flag is meant to avoid partial register stalls. Just like + the x86_partial_reg_stall this option might be considered for Generic32 + if our scheme for avoiding partial stalls was more effective. */ +const int x86_himode_math = ~(m_PPRO); +const int x86_promote_hi_regs = m_PPRO; +const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |m_GENERIC; +const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC); +const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; +const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; +const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; +const int x86_shift1 = ~m_486; +const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +/* In Generic model we have an conflict here in between PPro/Pentium4 based chips + that thread 128bit SSE registers as single units versus K8 based chips that + divide SSE registers to two 64bit halves. + x86_sse_partial_reg_dependency promote all store destinations to be 128bit + to allow register renaming on 128bit SSE units, but usually results in one + extra microop on 64bit SSE units. Experimental results shows that disabling + this option on P4 brings over 20% SPECfp regression, while enabling it on + K8 brings roughly 2.4% regression that can be partly masked by careful scheduling + of moves. */ +const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; +/* Set for machines where the type and dependencies are resolved on SSE + register parts instead of whole registers, so we may maintain just + lower part of scalar values in proper format leaving the upper part + undefined. */ +const int x86_sse_split_regs = m_ATHLON_K8; +const int x86_sse_typeless_stores = m_ATHLON_K8; +const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; +const int x86_use_ffreep = m_ATHLON_K8; +const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; +const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC); + +/* ??? Allowing interunit moves makes it all too easy for the compiler to put + integer data in xmm registers. Which results in pretty abysmal code. */ +/* APPLE LOCAL 5612787 mainline sse4 */ +const int x86_inter_unit_moves = ~(m_ATHLON_K8); + +const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32; +/* Some CPU cores are not able to predict more than 4 branch instructions in + the 16 byte window. */ +const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; +const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_CORE2 | m_GENERIC; +const int x86_use_bt = m_ATHLON_K8; +/* APPLE LOCAL begin */ +/* See comment in darwin override options for what needs fixing. + Most of this code has been rewritten in mainline anyhow. + All we've done here is remove the const since we assign to + them in SUBTARGET_OVERRIDE_OPTIONS. */ +/* Compare and exchange was added for 80486. */ +int x86_cmpxchg = ~m_386; +/* Compare and exchange 8 bytes was added for pentium. */ +int x86_cmpxchg8b = ~(m_386 | m_486); +/* Compare and exchange 16 bytes was added for nocona. */ +/* APPLE LOCAL mainline */ +int x86_cmpxchg16b = m_NOCONA | m_CORE2; +/* Exchange and add was added for 80486. */ +int x86_xadd = ~m_386; +/* APPLE LOCAL begin mainline bswap */ +/* Byteswap was added for 80486. */ +int x86_bswap = ~m_386; +/* APPLE LOCAL end mainline bswap */ +/* APPLE LOCAL end */ +const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC; +/* APPLE LOCAL end mainline */ + +/* In case the average insn count for single function invocation is + lower than this constant, emit fast (but longer) prologue and + epilogue code. */ +#define FAST_PROLOGUE_INSN_COUNT 20 + +/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ +static const char *const qi_reg_name[] = QI_REGISTER_NAMES; +static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; +static const char *const hi_reg_name[] = HI_REGISTER_NAMES; + +/* Array of the smallest class containing reg number REGNO, indexed by + REGNO. Used by REGNO_REG_CLASS in i386.h. */ + +enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = +{ + /* ax, dx, cx, bx */ + AREG, DREG, CREG, BREG, + /* si, di, bp, sp */ + SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, + /* FP registers */ + FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, + FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, + /* arg pointer */ + NON_Q_REGS, + /* flags, fpsr, dirflag, frame */ + NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, + SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, + SSE_REGS, SSE_REGS, + MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, + MMX_REGS, MMX_REGS, + NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, + NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, + SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, + SSE_REGS, SSE_REGS, +}; + +/* The "default" register map used in 32bit mode. */ + +int const dbx_register_map[FIRST_PSEUDO_REGISTER] = +{ + 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ + 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ + -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ + 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ + 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ + -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ + -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ +}; + +static int const x86_64_int_parameter_registers[6] = +{ + 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, + FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ +}; + +static int const x86_64_int_return_registers[4] = +{ + 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ +}; + +/* The "default" register map used in 64bit mode. */ +int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ + 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ + -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ + 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ + 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ + 8,9,10,11,12,13,14,15, /* extended integer registers */ + 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ +}; + +/* Define the register numbers to be used in Dwarf debugging information. + The SVR4 reference port C compiler uses the following register numbers + in its Dwarf output code: + 0 for %eax (gcc regno = 0) + 1 for %ecx (gcc regno = 2) + 2 for %edx (gcc regno = 1) + 3 for %ebx (gcc regno = 3) + 4 for %esp (gcc regno = 7) + 5 for %ebp (gcc regno = 6) + 6 for %esi (gcc regno = 4) + 7 for %edi (gcc regno = 5) + The following three DWARF register numbers are never generated by + the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 + believes these numbers have these meanings. + 8 for %eip (no gcc equivalent) + 9 for %eflags (gcc regno = 17) + 10 for %trapno (no gcc equivalent) + It is not at all clear how we should number the FP stack registers + for the x86 architecture. If the version of SDB on x86/svr4 were + a bit less brain dead with respect to floating-point then we would + have a precedent to follow with respect to DWARF register numbers + for x86 FP registers, but the SDB on x86/svr4 is so completely + broken with respect to FP registers that it is hardly worth thinking + of it as something to strive for compatibility with. + The version of x86/svr4 SDB I have at the moment does (partially) + seem to believe that DWARF register number 11 is associated with + the x86 register %st(0), but that's about all. Higher DWARF + register numbers don't seem to be associated with anything in + particular, and even for DWARF regno 11, SDB only seems to under- + stand that it should say that a variable lives in %st(0) (when + asked via an `=' command) if we said it was in DWARF regno 11, + but SDB still prints garbage when asked for the value of the + variable in question (via a `/' command). + (Also note that the labels SDB prints for various FP stack regs + when doing an `x' command are all wrong.) + Note that these problems generally don't affect the native SVR4 + C compiler because it doesn't allow the use of -O with -g and + because when it is *not* optimizing, it allocates a memory + location for each floating-point variable, and the memory + location is what gets described in the DWARF AT_location + attribute for the variable in question. + Regardless of the severe mental illness of the x86/svr4 SDB, we + do something sensible here and we use the following DWARF + register numbers. Note that these are all stack-top-relative + numbers. + 11 for %st(0) (gcc regno = 8) + 12 for %st(1) (gcc regno = 9) + 13 for %st(2) (gcc regno = 10) + 14 for %st(3) (gcc regno = 11) + 15 for %st(4) (gcc regno = 12) + 16 for %st(5) (gcc regno = 13) + 17 for %st(6) (gcc regno = 14) + 18 for %st(7) (gcc regno = 15) +*/ +int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = +{ + 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ + 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ + -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ + 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ + 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ + -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ + -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ +}; + +/* Test and compare insns in i386.md store the information needed to + generate branch and scc insns here. */ + +rtx ix86_compare_op0 = NULL_RTX; +rtx ix86_compare_op1 = NULL_RTX; +rtx ix86_compare_emitted = NULL_RTX; + +/* Size of the register save area. */ +#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) + +/* Define the structure for the machine field in struct function. */ + +struct stack_local_entry GTY(()) +{ + unsigned short mode; + unsigned short n; + rtx rtl; + struct stack_local_entry *next; +}; + +/* Structure describing stack frame layout. + Stack grows downward: + + [arguments] + <- ARG_POINTER + saved pc + + saved frame pointer if frame_pointer_needed + <- HARD_FRAME_POINTER + [saved regs] + + [padding1] \ + ) + [va_arg registers] ( + > to_allocate <- FRAME_POINTER + [frame] ( + ) + [padding2] / + */ +struct ix86_frame +{ + int nregs; + int padding1; + int va_arg_size; + HOST_WIDE_INT frame; + int padding2; + int outgoing_arguments_size; + int red_zone_size; + + HOST_WIDE_INT to_allocate; + /* The offsets relative to ARG_POINTER. */ + HOST_WIDE_INT frame_pointer_offset; + HOST_WIDE_INT hard_frame_pointer_offset; + HOST_WIDE_INT stack_pointer_offset; + + /* When save_regs_using_mov is set, emit prologue using + move instead of push instructions. */ + bool save_regs_using_mov; +}; + +/* Code model option. */ +enum cmodel ix86_cmodel; +/* Asm dialect. */ +enum asm_dialect ix86_asm_dialect = ASM_ATT; +/* TLS dialects. */ +enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; + +/* Which unit we are generating floating point math for. */ +enum fpmath_unit ix86_fpmath; + +/* Which cpu are we scheduling for. */ +enum processor_type ix86_tune; +/* Which instruction set architecture to use. */ +enum processor_type ix86_arch; + +/* true if sse prefetch instruction is not NOOP. */ +int x86_prefetch_sse; + +/* ix86_regparm_string as a number */ +static int ix86_regparm; + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +/* True if SSE population count insn supported. */ +int x86_popcnt; +/* APPLE LOCAL end 5612787 mainline sse4 */ + +/* -mstackrealign option */ +extern int ix86_force_align_arg_pointer; +static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; + +/* Preferred alignment for stack boundary in bits. */ +unsigned int ix86_preferred_stack_boundary; +/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */ +unsigned int ix86_save_preferred_stack_boundary; +/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */ + +/* Values 1-5: see jump.c */ +int ix86_branch_cost; + +/* Variables which are this size or smaller are put in the data/bss + or ldata/lbss sections. */ + +int ix86_section_threshold = 65536; + +/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ +char internal_label_prefix[16]; +int internal_label_prefix_len; + +static bool ix86_handle_option (size_t, const char *, int); +static void output_pic_addr_const (FILE *, rtx, int); +static void put_condition_code (enum rtx_code, enum machine_mode, + int, int, FILE *); +static const char *get_some_local_dynamic_name (void); +static int get_some_local_dynamic_name_1 (rtx *, void *); +static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); +static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, + rtx *); +static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); +static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, + enum machine_mode); +static rtx get_thread_pointer (int); +static rtx legitimize_tls_address (rtx, enum tls_model, int); +static void get_pc_thunk_name (char [32], unsigned int); +static rtx gen_push (rtx); +static int ix86_flags_dependent (rtx, rtx, enum attr_type); +static int ix86_agi_dependent (rtx, rtx, enum attr_type); +static struct machine_function * ix86_init_machine_status (void); +static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); +static int ix86_nsaved_regs (void); +static void ix86_emit_save_regs (void); +static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); +static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); +static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); +static HOST_WIDE_INT ix86_GOT_alias_set (void); +static void ix86_adjust_counter (rtx, HOST_WIDE_INT); +static rtx ix86_expand_aligntest (rtx, int); +static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); +static int ix86_issue_rate (void); +static int ix86_adjust_cost (rtx, rtx, rtx, int); +static int ia32_multipass_dfa_lookahead (void); +static void ix86_init_mmx_sse_builtins (void); +static rtx x86_this_parameter (tree); +static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree); +static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); +static void x86_file_start (void); +static void ix86_reorg (void); +static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); +static tree ix86_build_builtin_va_list (void); +static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, + tree, int *, int); +static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); +static bool ix86_scalar_mode_supported_p (enum machine_mode); +static bool ix86_vector_mode_supported_p (enum machine_mode); + +static int ix86_address_cost (rtx); +static bool ix86_cannot_force_const_mem (rtx); +static rtx ix86_delegitimize_address (rtx); + +static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; + +struct builtin_description; +static rtx ix86_expand_sse_comi (const struct builtin_description *, + tree, rtx); +static rtx ix86_expand_sse_compare (const struct builtin_description *, + tree, rtx); +static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); +static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); +static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); +static rtx ix86_expand_store_builtin (enum insn_code, tree); +static rtx safe_vector_operand (rtx, enum machine_mode); +static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); +static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); +static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); +static int ix86_fp_comparison_sahf_cost (enum rtx_code code); +static int ix86_fp_comparison_cost (enum rtx_code code); +static unsigned int ix86_select_alt_pic_regnum (void); +static int ix86_save_reg (unsigned int, int); +static void ix86_compute_frame_layout (struct ix86_frame *); +static int ix86_comp_type_attributes (tree, tree); +static int ix86_function_regparm (tree, tree); +const struct attribute_spec ix86_attribute_table[]; +static bool ix86_function_ok_for_sibcall (tree, tree); +static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); +static int ix86_value_regno (enum machine_mode, tree, tree); +static bool contains_128bit_aligned_vector_p (tree); +static rtx ix86_struct_value_rtx (tree, int); +static bool ix86_ms_bitfield_layout_p (tree); +static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); +static int extended_reg_mentioned_1 (rtx *, void *); +static bool ix86_rtx_costs (rtx, int, int, int *); +static int min_insn_size (rtx); +static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); +static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); +static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static void ix86_init_builtins (void); +static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +/* APPLE LOCAL mangle_type 7105099 */ +static const char *ix86_mangle_type (tree); +static tree ix86_stack_protect_fail (void); +static rtx ix86_internal_arg_pointer (void); +static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); + +/* This function is only used on Solaris. */ +static void i386_solaris_elf_named_section (const char *, unsigned int, tree) + ATTRIBUTE_UNUSED; + +/* Register class used for passing given 64bit part of the argument. + These represent classes as documented by the PS ABI, with the exception + of SSESF, SSEDF classes, that are basically SSE class, just gcc will + use SF or DFmode move instead of DImode to avoid reformatting penalties. + + Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves + whenever possible (upper half does contain padding). + */ +enum x86_64_reg_class + { + X86_64_NO_CLASS, + X86_64_INTEGER_CLASS, + X86_64_INTEGERSI_CLASS, + X86_64_SSE_CLASS, + X86_64_SSESF_CLASS, + X86_64_SSEDF_CLASS, + X86_64_SSEUP_CLASS, + X86_64_X87_CLASS, + X86_64_X87UP_CLASS, + X86_64_COMPLEX_X87_CLASS, + X86_64_MEMORY_CLASS + }; +static const char * const x86_64_reg_class_name[] = { + "no", "integer", "integerSI", "sse", "sseSF", "sseDF", + "sseup", "x87", "x87up", "cplx87", "no" +}; + +#define MAX_CLASSES 4 + +/* Table of constants used by fldpi, fldln2, etc.... */ +static REAL_VALUE_TYPE ext_80387_constants_table [5]; +static bool ext_80387_constants_init = 0; +static void init_ext_80387_constants (void); +static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; +static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; +static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; +static section *x86_64_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) + ATTRIBUTE_UNUSED; + +/* Initialize the GCC target structure. */ +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +# undef TARGET_MERGE_DECL_ATTRIBUTES +# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes +#endif + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS ix86_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN ix86_expand_builtin + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue + +#undef TARGET_ENCODE_SECTION_INFO +#ifndef SUBTARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info +#else +#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO +#endif + +#undef TARGET_ASM_OPEN_PAREN +#define TARGET_ASM_OPEN_PAREN "" +#undef TARGET_ASM_CLOSE_PAREN +#define TARGET_ASM_CLOSE_PAREN "" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG +#ifdef ASM_QUAD +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD +#endif + +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + ia32_multipass_dfa_lookahead + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address + +#undef TARGET_MS_BITFIELD_LAYOUT_P +#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p + +#if TARGET_MACHO +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P darwin_binds_local_p +#endif + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START x86_file_start + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS \ + (TARGET_DEFAULT \ + | TARGET_64BIT_DEFAULT \ + | TARGET_SUBTARGET_DEFAULT \ + | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) + +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION ix86_handle_option + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS ix86_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST ix86_address_cost + +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs +#undef TARGET_CC_MODES_COMPATIBLE +#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list + +#undef TARGET_MD_ASM_CLOBBERS +#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer +#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC +#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec + +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p + +#ifdef HAVE_AS_TLS +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel +#endif + +#ifdef SUBTARGET_INSERT_ATTRIBUTES +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES +#endif + +/* APPLE LOCAL begin mangle_type 7105099 */ +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE ix86_mangle_type +/* APPLE LOCAL end mangle_type 7105099 */ + +#undef TARGET_STACK_PROTECT_FAIL +#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE ix86_function_value + +struct gcc_target targetm = TARGET_INITIALIZER; + + +/* The svr4 ABI for the i386 says that records and unions are returned + in memory. */ +#ifndef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 +#endif + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) +{ + switch (code) + { + case OPT_m3dnow: + if (!value) + { + target_flags &= ~MASK_3DNOW_A; + target_flags_explicit |= MASK_3DNOW_A; + } + return true; + + case OPT_mmmx: + if (!value) + { + target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); + target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; + } + return true; + + case OPT_msse: + if (!value) + { + target_flags &= ~(MASK_SSE2 | MASK_SSE3); + target_flags_explicit |= MASK_SSE2 | MASK_SSE3; + } + return true; + + case OPT_msse2: + if (!value) + { + target_flags &= ~MASK_SSE3; + target_flags_explicit |= MASK_SSE3; + } + return true; + + default: + return true; + } +} + +/* APPLE LOCAL begin 4760857 optimization pragmas. */ +/* Hoisted so it can be used by reset_optimization_options. */ +static struct ptt + { + const struct processor_costs *cost; /* Processor costs */ + const int target_enable; /* Target flags to enable. */ + const int target_disable; /* Target flags to disable. */ + const int align_loop; /* Default alignments. */ + const int align_loop_max_skip; + const int align_jump; + const int align_jump_max_skip; + const int align_func; + } +const processor_target_table[PROCESSOR_max] = + { + {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, + {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, + {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, + {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, + {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, + {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, + {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, + {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, + {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, + /* APPLE LOCAL mainline */ + {&core2_cost, 0, 0, 16, 7, 16, 7, 16}, + {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, + {&generic64_cost, 0, 0, 16, 7, 16, 7, 16} + }; +/* APPLE LOCAL end 4760857 optimization pragmas. */ + +/* Sometimes certain combinations of command options do not make + sense on a particular target machine. You can define a macro + `OVERRIDE_OPTIONS' to take account of this. This macro, if + defined, is executed once just after all the command options have + been parsed. + + Don't use this macro to turn on various extra optimizations for + `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ + +void +override_options (void) +{ + int i; + int ix86_tune_defaulted = 0; + /* APPLE LOCAL mainline */ + int ix86_arch_specified = 0; + + /* Comes from final.c -- no real reason to change it. */ +#define MAX_CODE_ALIGN 16 + + /* APPLE LOCAL begin 4760857 optimization pragmas. */ + /* processor_target_table moved to file scope. */ + /* APPLE LOCAL end 4760857 optimization pragmas. */ + + static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; + static struct pta + { + const char *const name; /* processor name or nickname. */ + const enum processor_type processor; + const enum pta_flags + { + PTA_SSE = 1, + PTA_SSE2 = 2, + PTA_SSE3 = 4, + PTA_MMX = 8, + PTA_PREFETCH_SSE = 16, + PTA_3DNOW = 32, + PTA_3DNOW_A = 64, + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* APPLE LOCAL begin mainline */ + PTA_64BIT = 128, + PTA_SSSE3 = 256, + /* APPLE LOCAL end mainline */ + PTA_CX16 = 1 << 9, + PTA_POPCNT = 1 << 10, + PTA_ABM = 1 << 11, + PTA_SSE4A = 1 << 12, + PTA_NO_SAHF = 1 << 13, + PTA_SSE4_1 = 1 << 14, + PTA_SSE4_2 = 1 << 15 + /* APPLE LOCAL end 5612787 mainline sse4 */ + } flags; + } + const processor_alias_table[] = + { + {"i386", PROCESSOR_I386, 0}, + {"i486", PROCESSOR_I486, 0}, + {"i586", PROCESSOR_PENTIUM, 0}, + {"pentium", PROCESSOR_PENTIUM, 0}, + {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, + {"winchip-c6", PROCESSOR_I486, PTA_MMX}, + {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, + {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, + {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, + {"i686", PROCESSOR_PENTIUMPRO, 0}, + {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, + {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, + {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, + {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, + {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, + {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 + | PTA_MMX | PTA_PREFETCH_SSE}, + {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 + | PTA_MMX | PTA_PREFETCH_SSE}, + {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_MMX | PTA_PREFETCH_SSE}, + {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT + | PTA_MMX | PTA_PREFETCH_SSE}, + /* APPLE LOCAL begin mainline */ + {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_64BIT | PTA_MMX + | PTA_PREFETCH_SSE}, + /* APPLE LOCAL end mainline */ + {"k6", PROCESSOR_K6, PTA_MMX}, + {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, + {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, + {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW + | PTA_3DNOW_A}, + {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE + | PTA_3DNOW | PTA_3DNOW_A}, + {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW + | PTA_3DNOW_A | PTA_SSE}, + {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW + | PTA_3DNOW_A | PTA_SSE}, + {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW + | PTA_3DNOW_A | PTA_SSE}, + {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT + | PTA_SSE | PTA_SSE2 }, + {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, + {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, + {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, + {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, + {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, + {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, + }; + + int const pta_size = ARRAY_SIZE (processor_alias_table); + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + +#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS + SUBSUBTARGET_OVERRIDE_OPTIONS; +#endif + + /* -fPIC is the default for x86_64. */ + if (TARGET_MACHO && TARGET_64BIT) + flag_pic = 2; + + /* Set the default values for switches whose default depends on TARGET_64BIT + in case they weren't overwritten by command line options. */ + if (TARGET_64BIT) + { + /* Mach-O doesn't support omitting the frame pointer for now. */ + if (flag_omit_frame_pointer == 2) + flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); + if (flag_asynchronous_unwind_tables == 2) + flag_asynchronous_unwind_tables = 1; + if (flag_pcc_struct_return == 2) + flag_pcc_struct_return = 0; + } + else + { + if (flag_omit_frame_pointer == 2) + flag_omit_frame_pointer = 0; + if (flag_asynchronous_unwind_tables == 2) + flag_asynchronous_unwind_tables = 0; + if (flag_pcc_struct_return == 2) + flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; + } + + /* Need to check -mtune=generic first. */ + if (ix86_tune_string) + { + if (!strcmp (ix86_tune_string, "generic") + || !strcmp (ix86_tune_string, "i686") + /* As special support for cross compilers we read -mtune=native + as -mtune=generic. With native compilers we won't see the + -mtune=native, as it was changed by the driver. */ + || !strcmp (ix86_tune_string, "native")) + { + if (TARGET_64BIT) + ix86_tune_string = "generic64"; + else + ix86_tune_string = "generic32"; + } + else if (!strncmp (ix86_tune_string, "generic", 7)) + error ("bad value (%s) for -mtune= switch", ix86_tune_string); + } + else + { + if (ix86_arch_string) + ix86_tune_string = ix86_arch_string; + if (!ix86_tune_string) + { + ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; + ix86_tune_defaulted = 1; + } + + /* ix86_tune_string is set to ix86_arch_string or defaulted. We + need to use a sensible tune option. */ + if (!strcmp (ix86_tune_string, "generic") + || !strcmp (ix86_tune_string, "x86-64") + || !strcmp (ix86_tune_string, "i686")) + { + if (TARGET_64BIT) + ix86_tune_string = "generic64"; + else + ix86_tune_string = "generic32"; + } + } + if (!strcmp (ix86_tune_string, "x86-64")) + warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " + "-mtune=generic instead as appropriate."); + + if (!ix86_arch_string) + ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; + /* APPLE LOCAL begin mainline */ + else + ix86_arch_specified = 1; + /* APPLE LOCAL end mainline */ + if (!strcmp (ix86_arch_string, "generic")) + error ("generic CPU can be used only for -mtune= switch"); + if (!strncmp (ix86_arch_string, "generic", 7)) + error ("bad value (%s) for -march= switch", ix86_arch_string); + + if (ix86_cmodel_string != 0) + { + if (!strcmp (ix86_cmodel_string, "small")) + ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; + else if (!strcmp (ix86_cmodel_string, "medium")) + ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; + else if (flag_pic) + sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); + else if (!strcmp (ix86_cmodel_string, "32")) + ix86_cmodel = CM_32; + else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) + ix86_cmodel = CM_KERNEL; + else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) + ix86_cmodel = CM_LARGE; + else + error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); + } + else + { + ix86_cmodel = CM_32; + if (TARGET_64BIT) + ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; + } + if (ix86_asm_string != 0) + { + if (! TARGET_MACHO + && !strcmp (ix86_asm_string, "intel")) + ix86_asm_dialect = ASM_INTEL; + else if (!strcmp (ix86_asm_string, "att")) + ix86_asm_dialect = ASM_ATT; + else + error ("bad value (%s) for -masm= switch", ix86_asm_string); + } + if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) + error ("code model %qs not supported in the %s bit mode", + ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); + if (ix86_cmodel == CM_LARGE) + sorry ("code model %<large%> not supported yet"); + if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) + sorry ("%i-bit mode not compiled in", + (target_flags & MASK_64BIT) ? 64 : 32); + + for (i = 0; i < pta_size; i++) + if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) + { + ix86_arch = processor_alias_table[i].processor; + /* Default cpu tuning to the architecture. */ + ix86_tune = ix86_arch; + if (processor_alias_table[i].flags & PTA_MMX + && !(target_flags_explicit & MASK_MMX)) + target_flags |= MASK_MMX; + if (processor_alias_table[i].flags & PTA_3DNOW + && !(target_flags_explicit & MASK_3DNOW)) + target_flags |= MASK_3DNOW; + if (processor_alias_table[i].flags & PTA_3DNOW_A + && !(target_flags_explicit & MASK_3DNOW_A)) + target_flags |= MASK_3DNOW_A; + if (processor_alias_table[i].flags & PTA_SSE + && !(target_flags_explicit & MASK_SSE)) + target_flags |= MASK_SSE; + if (processor_alias_table[i].flags & PTA_SSE2 + && !(target_flags_explicit & MASK_SSE2)) + target_flags |= MASK_SSE2; + if (processor_alias_table[i].flags & PTA_SSE3 + && !(target_flags_explicit & MASK_SSE3)) + target_flags |= MASK_SSE3; + /* APPLE LOCAL begin mainline */ + if (processor_alias_table[i].flags & PTA_SSSE3 + && !(target_flags_explicit & MASK_SSSE3)) + target_flags |= MASK_SSSE3; + /* APPLE LOCAL end mainline */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + if (processor_alias_table[i].flags & PTA_SSE4_1 + && !(target_flags_explicit & MASK_SSE4_1)) + target_flags |= MASK_SSE4_1; + if (processor_alias_table[i].flags & PTA_SSE4_2 + && !(target_flags_explicit & MASK_SSE4_2)) + target_flags |= MASK_SSE4_2; + if (processor_alias_table[i].flags & PTA_SSE4A + && !(target_flags_explicit & MASK_SSE4A)) + target_flags |= MASK_SSE4A; + /* APPLE LOCAL end 5612787 mainline sse4 */ + if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) + x86_prefetch_sse = true; + if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) + error ("CPU you selected does not support x86-64 " + "instruction set"); + break; + } + + if (i == pta_size) + error ("bad value (%s) for -march= switch", ix86_arch_string); + + for (i = 0; i < pta_size; i++) + if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) + { + ix86_tune = processor_alias_table[i].processor; + if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) + { + if (ix86_tune_defaulted) + { + ix86_tune_string = "x86-64"; + for (i = 0; i < pta_size; i++) + if (! strcmp (ix86_tune_string, + processor_alias_table[i].name)) + break; + ix86_tune = processor_alias_table[i].processor; + } + else + error ("CPU you selected does not support x86-64 " + "instruction set"); + } + /* Intel CPUs have always interpreted SSE prefetch instructions as + NOPs; so, we can enable SSE prefetch instructions even when + -mtune (rather than -march) points us to a processor that has them. + However, the VIA C3 gives a SIGILL, so we only do that for i686 and + higher processors. */ + if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) + x86_prefetch_sse = true; + break; + } + if (i == pta_size) + error ("bad value (%s) for -mtune= switch", ix86_tune_string); + + if (optimize_size) + ix86_cost = &size_cost; + else + ix86_cost = processor_target_table[ix86_tune].cost; + target_flags |= processor_target_table[ix86_tune].target_enable; + target_flags &= ~processor_target_table[ix86_tune].target_disable; + + /* Arrange to set up i386_stack_locals for all functions. */ + init_machine_status = ix86_init_machine_status; + + /* Validate -mregparm= value. */ + if (ix86_regparm_string) + { + i = atoi (ix86_regparm_string); + if (i < 0 || i > REGPARM_MAX) + error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); + else + ix86_regparm = i; + } + else + if (TARGET_64BIT) + ix86_regparm = REGPARM_MAX; + + /* If the user has provided any of the -malign-* options, + warn and use that value only if -falign-* is not set. + Remove this code in GCC 3.2 or later. */ + if (ix86_align_loops_string) + { + warning (0, "-malign-loops is obsolete, use -falign-loops"); + if (align_loops == 0) + { + i = atoi (ix86_align_loops_string); + if (i < 0 || i > MAX_CODE_ALIGN) + error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + else + align_loops = 1 << i; + } + } + + if (ix86_align_jumps_string) + { + warning (0, "-malign-jumps is obsolete, use -falign-jumps"); + if (align_jumps == 0) + { + i = atoi (ix86_align_jumps_string); + if (i < 0 || i > MAX_CODE_ALIGN) + error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + else + align_jumps = 1 << i; + } + } + + if (ix86_align_funcs_string) + { + warning (0, "-malign-functions is obsolete, use -falign-functions"); + if (align_functions == 0) + { + i = atoi (ix86_align_funcs_string); + if (i < 0 || i > MAX_CODE_ALIGN) + error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + else + align_functions = 1 << i; + } + } + + /* Default align_* from the processor table. */ + if (align_loops == 0) + { + align_loops = processor_target_table[ix86_tune].align_loop; + align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; + } + if (align_jumps == 0) + { + align_jumps = processor_target_table[ix86_tune].align_jump; + align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; + } + if (align_functions == 0) + { + align_functions = processor_target_table[ix86_tune].align_func; + } + + /* Validate -mbranch-cost= value, or provide default. */ + ix86_branch_cost = ix86_cost->branch_cost; + if (ix86_branch_cost_string) + { + i = atoi (ix86_branch_cost_string); + if (i < 0 || i > 5) + error ("-mbranch-cost=%d is not between 0 and 5", i); + else + ix86_branch_cost = i; + } + if (ix86_section_threshold_string) + { + i = atoi (ix86_section_threshold_string); + if (i < 0) + error ("-mlarge-data-threshold=%d is negative", i); + else + ix86_section_threshold = i; + } + + if (ix86_tls_dialect_string) + { + if (strcmp (ix86_tls_dialect_string, "gnu") == 0) + ix86_tls_dialect = TLS_DIALECT_GNU; + else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) + ix86_tls_dialect = TLS_DIALECT_GNU2; + else if (strcmp (ix86_tls_dialect_string, "sun") == 0) + ix86_tls_dialect = TLS_DIALECT_SUN; + else + error ("bad value (%s) for -mtls-dialect= switch", + ix86_tls_dialect_string); + } + /* APPLE LOCAL begin mainline */ + if (TARGET_64BIT) + { + if (TARGET_ALIGN_DOUBLE) + error ("-malign-double makes no sense in the 64bit mode"); + if (TARGET_RTD) + error ("-mrtd calling convention not supported in the 64bit mode"); + /* APPLE LOCAL begin radar 4877693 */ + if (ix86_force_align_arg_pointer) + error ("-mstackrealign not supported in the 64bit mode"); + /* APPLE LOCAL end radar 4877693 */ + + /* Enable by default the SSE and MMX builtins. Do allow the user to + explicitly disable any of these. In particular, disabling SSE and + MMX for kernel code is extremely useful. */ + if (!ix86_arch_specified) + target_flags + |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE + | TARGET_SUBTARGET64_DEFAULT) & ~target_flags_explicit); + /* APPLE LOCAL begin mainline candidate */ + /* Disable the red zone for kernel compilation. + ??? Why aren't we using -mcmodel=kernel? */ + if (TARGET_MACHO + && (flag_mkernel || flag_apple_kext)) + target_flags |= MASK_NO_RED_ZONE; + /* APPLE LOCAL end mainline candidate */ + } + else + { + if (!ix86_arch_specified) + target_flags |= (TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit); + + /* i386 ABI does not specify red zone. It still makes sense to use it + when programmer takes care to stack from being destroyed. */ + if (!(target_flags_explicit & MASK_NO_RED_ZONE)) + target_flags |= MASK_NO_RED_ZONE; + } + + /* APPLE LOCAL end mainline */ + /* Keep nonleaf frame pointers. */ + if (flag_omit_frame_pointer) + target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; + else if (TARGET_OMIT_LEAF_FRAME_POINTER) + flag_omit_frame_pointer = 1; + + /* If we're doing fast math, we don't care about comparison order + wrt NaNs. This lets us use a shorter comparison sequence. */ + if (flag_finite_math_only) + target_flags &= ~MASK_IEEE_FP; + + /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, + since the insns won't need emulation. */ + if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) + target_flags &= ~MASK_NO_FANCY_MATH_387; + + /* Likewise, if the target doesn't have a 387, or we've specified + software floating point, don't use 387 inline intrinsics. */ + if (!TARGET_80387) + target_flags |= MASK_NO_FANCY_MATH_387; + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* Turn on SSE4.1 builtins for -msse4.2. */ + if (TARGET_SSE4_2) + target_flags |= MASK_SSE4_1; + /* Turn on SSSE3 builtins for -msse4.1. */ + if (TARGET_SSE4_1) + target_flags |= MASK_SSSE3; + /* Turn on SSE3 builtins for -msse4a. */ + if (TARGET_SSE4A) + target_flags |= MASK_SSE3; + /* APPLE LOCAL end 5612787 mainline sse4 */ + /* APPLE LOCAL begin mainline */ + /* Turn on SSE3 builtins for -mssse3. */ + if (TARGET_SSSE3) + target_flags |= MASK_SSE3; + /* APPLE LOCAL end mainline */ + /* Turn on SSE2 builtins for -msse3. */ + if (TARGET_SSE3) + target_flags |= MASK_SSE2; + + /* Turn on SSE builtins for -msse2. */ + if (TARGET_SSE2) + target_flags |= MASK_SSE; + + /* Turn on MMX builtins for -msse. */ + if (TARGET_SSE) + { + target_flags |= MASK_MMX & ~target_flags_explicit; + x86_prefetch_sse = true; + } + + /* Turn on MMX builtins for 3Dnow. */ + if (TARGET_3DNOW) + target_flags |= MASK_MMX; + + /* APPLE LOCAL mainline */ + /* Moved this up... */ + /* Validate -mpreferred-stack-boundary= value, or provide default. + The default of 128 bits is for Pentium III's SSE __m128. We can't + change it because of optimize_size. Otherwise, we can't mix object + files compiled with -Os and -On. */ + ix86_preferred_stack_boundary = 128; + if (ix86_preferred_stack_boundary_string) + { + i = atoi (ix86_preferred_stack_boundary_string); + if (i < (TARGET_64BIT ? 4 : 2) || i > 12) + error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, + TARGET_64BIT ? 4 : 2); + else + ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; + } + + /* Accept -msseregparm only if at least SSE support is enabled. */ + if (TARGET_SSEREGPARM + && ! TARGET_SSE) + error ("-msseregparm used without SSE enabled"); + + ix86_fpmath = TARGET_FPMATH_DEFAULT; + + if (ix86_fpmath_string != 0) + { + if (! strcmp (ix86_fpmath_string, "387")) + ix86_fpmath = FPMATH_387; + else if (! strcmp (ix86_fpmath_string, "sse")) + { + if (!TARGET_SSE) + { + warning (0, "SSE instruction set disabled, using 387 arithmetics"); + ix86_fpmath = FPMATH_387; + } + else + ix86_fpmath = FPMATH_SSE; + } + else if (! strcmp (ix86_fpmath_string, "387,sse") + || ! strcmp (ix86_fpmath_string, "sse,387")) + { + if (!TARGET_SSE) + { + warning (0, "SSE instruction set disabled, using 387 arithmetics"); + ix86_fpmath = FPMATH_387; + } + else if (!TARGET_80387) + { + warning (0, "387 instruction set disabled, using SSE arithmetics"); + ix86_fpmath = FPMATH_SSE; + } + else + ix86_fpmath = FPMATH_SSE | FPMATH_387; + } + else + error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); + } + + /* If the i387 is disabled, then do not return values in it. */ + if (!TARGET_80387) + target_flags &= ~MASK_FLOAT_RETURNS; + + if ((x86_accumulate_outgoing_args & TUNEMASK) + && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) + && !optimize_size) + target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + + /* ??? Unwind info is not correct around the CFG unless either a frame + pointer is present or M_A_O_A is set. Fixing this requires rewriting + unwind info generation to be aware of the CFG and propagating states + around edges. */ + if ((flag_unwind_tables || flag_asynchronous_unwind_tables + || flag_exceptions || flag_non_call_exceptions) + && flag_omit_frame_pointer + && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) + { + if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) + warning (0, "unwind tables currently require either a frame pointer " + "or -maccumulate-outgoing-args for correctness"); + target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + } + + /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ + { + char *p; + ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); + p = strchr (internal_label_prefix, 'X'); + internal_label_prefix_len = p - internal_label_prefix; + *p = '\0'; + } + + /* When scheduling description is not available, disable scheduler pass + so it won't slow down the compilation and make x87 code slower. */ + /* APPLE LOCAL 5591571 */ + if (1 || !TARGET_SCHEDULE) + flag_schedule_insns_after_reload = flag_schedule_insns = 0; + + /* APPLE LOCAL begin dynamic-no-pic */ +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P) + { + if (flag_pic) + warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC"); + flag_pic = 0; + } + else +#endif + if (flag_pic == 1) + { + /* Darwin's -fpic is -fPIC. */ + flag_pic = 2; + } + /* APPLE LOCAL end dynamic-no-pic */ + /* APPLE LOCAL begin 4812082 -fast */ + /* These flags were the best on the software H264 codec, and have therefore + been lumped into -fast per 4812082. They have not been evaluated on + any other code, except that -fno-tree-pre is known to lose on the + hardware accelerated version of the codec. */ + if (flag_fast || flag_fastf || flag_fastcp) + { + flag_omit_frame_pointer = 1; + flag_strict_aliasing = 1; + flag_tree_pre = 0; + target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; + align_loops = processor_target_table[ix86_tune].align_loop; + } + /* APPLE LOCAL end 4812082 -fast */ +} + +/* switch to the appropriate section for output of DECL. + DECL is either a `VAR_DECL' node or a constant of some sort. + RELOC indicates whether forming the initial value of DECL requires + link-time relocations. */ + +static section * +x86_64_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && ix86_in_large_data_p (decl)) + { + const char *sname = NULL; + unsigned int flags = SECTION_WRITE; + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + sname = ".ldata"; + break; + case SECCAT_DATA_REL: + sname = ".ldata.rel"; + break; + case SECCAT_DATA_REL_LOCAL: + sname = ".ldata.rel.local"; + break; + case SECCAT_DATA_REL_RO: + sname = ".ldata.rel.ro"; + break; + case SECCAT_DATA_REL_RO_LOCAL: + sname = ".ldata.rel.ro.local"; + break; + case SECCAT_BSS: + sname = ".lbss"; + flags |= SECTION_BSS; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + sname = ".lrodata"; + flags = 0; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (sname) + { + /* We might get called with string constants, but get_named_section + doesn't like them as they are not DECLs. Also, we need to set + flags in that case. */ + if (!DECL_P (decl)) + return get_section (sname, flags, NULL); + return get_named_section (decl, sname, reloc); + } + } + return default_elf_select_section (decl, reloc, align); +} + +/* Build up a unique section name, expressed as a + STRING_CST node, and assign it to DECL_SECTION_NAME (decl). + RELOC indicates whether the initial value of EXP requires + link-time relocations. */ + +static void +x86_64_elf_unique_section (tree decl, int reloc) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && ix86_in_large_data_p (decl)) + { + const char *prefix = NULL; + /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ + bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; + + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; + break; + case SECCAT_BSS: + prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (prefix) + { + const char *name; + size_t nlen, plen; + char *string; + plen = strlen (prefix); + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = targetm.strip_name_encoding (name); + nlen = strlen (name); + + string = alloca (nlen + plen + 1); + memcpy (string, prefix, plen); + memcpy (string + plen, name, nlen + 1); + + DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); + return; + } + } + default_unique_section (decl, reloc); +} + +#ifdef COMMON_ASM_OP +/* This says how to output assembler code to declare an + uninitialized external linkage data object. + + For medium model x86-64 we need to use .largecomm opcode for + large objects. */ +void +x86_elf_aligned_common (FILE *file, + const char *name, unsigned HOST_WIDE_INT size, + int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + fprintf (file, ".largecomm\t"); + else + fprintf (file, "%s", COMMON_ASM_OP); + assemble_name (file, name); + fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", + size, align / BITS_PER_UNIT); +} + +/* Utility function for targets to use in implementing + ASM_OUTPUT_ALIGNED_BSS. */ + +void +x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, + const char *name, unsigned HOST_WIDE_INT size, + int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + switch_to_section (get_named_section (decl, ".lbss", 0)); + else + switch_to_section (bss_section); + ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +#ifdef ASM_DECLARE_OBJECT_NAME + last_assemble_variable_decl = decl; + ASM_DECLARE_OBJECT_NAME (file, name, decl); +#else + /* Standard thing is just output label for the object. */ + ASM_OUTPUT_LABEL (file, name); +#endif /* ASM_DECLARE_OBJECT_NAME */ + ASM_OUTPUT_SKIP (file, size ? size : 1); +} +#endif + +void +optimization_options (int level, int size ATTRIBUTE_UNUSED) +{ + /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */ +#if TARGET_MACHO + flag_strict_aliasing = 0; +#endif + /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */ + /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to + make the problem with not enough registers even worse. */ +#ifdef INSN_SCHEDULING + if (level > 1) + flag_schedule_insns = 0; +#endif + + /* APPLE LOCAL begin pragma fenv */ + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + /* APPLE LOCAL end pragma fenv */ + + if (TARGET_MACHO) + /* The Darwin libraries never set errno, so we might as well + avoid calling them when that's the only reason we would. */ + flag_errno_math = 0; + + /* The default values of these switches depend on the TARGET_64BIT + that is not known at this moment. Mark these values with 2 and + let user the to override these. In case there is no command line option + specifying them, we will set the defaults in override_options. */ + if (optimize >= 1) + flag_omit_frame_pointer = 2; + flag_pcc_struct_return = 2; + flag_asynchronous_unwind_tables = 2; +#ifdef SUBTARGET_OPTIMIZATION_OPTIONS + SUBTARGET_OPTIMIZATION_OPTIONS; +#endif + /* APPLE LOCAL begin 4200243 */ + if (getenv ("RC_FORCE_SSE3")) + target_flags |= MASK_SSE3; +} +/* APPLE LOCAL end 4200243 */ + +/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */ +/* Version of the above for use from #pragma optimization_level. Only + per-function flags are reset. */ +#if TARGET_MACHO +void +reset_optimization_options (int level, int size) +{ + /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to + make the problem with not enough registers even worse. */ +#ifdef INSN_SCHEDULING + if (level > 1) + flag_schedule_insns = 0; +#endif + + /* APPLE LOCAL begin pragma fenv */ + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + /* APPLE LOCAL end pragma fenv */ + + /* The default values of these switches depend on TARGET_64BIT + which was set earlier and not reset. */ + if (optimize >= 1) + { + if (TARGET_64BIT) + flag_omit_frame_pointer = 1; + else + flag_omit_frame_pointer = 0; + } +#ifdef SUBTARGET_OPTIMIZATION_OPTIONS + SUBTARGET_OPTIMIZATION_OPTIONS; +#endif + /* APPLE LOCAL begin 4760857 */ + if (size) + ix86_cost = &size_cost; + else + ix86_cost = processor_target_table[ix86_tune].cost; + + /* Default align_* from the processor table. */ + if (align_loops == 0) + { + align_loops = processor_target_table[ix86_tune].align_loop; + align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; + } + if (align_jumps == 0) + { + align_jumps = processor_target_table[ix86_tune].align_jump; + align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; + } + /* APPLE LOCAL end 4760857 */ +} +#endif +/* APPLE LOCAL end optimization pragmas 3124235/3420242 */ + +/* Table of valid machine attributes. */ +const struct attribute_spec ix86_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + /* Stdcall attribute says callee is responsible for popping arguments + if they are not variable. */ + { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* Fastcall attribute says callee is responsible for popping arguments + if they are not variable. */ + { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* Cdecl attribute says the callee is a normal C declaration */ + { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* Regparm attribute specifies how many integer arguments are to be + passed in registers. */ + { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, + /* APPLE LOCAL begin regparmandstackparm */ + /* regparmandstackparm means two entry points; a traditional stack-based + one, and another, with a mangled name, that employs regparm and + sseregparm. */ + { "regparmandstackparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + { "regparmandstackparmee", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* APPLE LOCAL end regparmandstackparm */ + /* Sseregparm attribute says we are using x86_64 calling conventions + for FP arguments. */ + { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, + /* force_align_arg_pointer says this function realigns the stack at entry. */ + { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, + false, true, true, ix86_handle_cconv_attribute }, +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES + { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, + { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, + { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, +#endif + { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, + { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, +#ifdef SUBTARGET_ATTRIBUTE_TABLE + SUBTARGET_ATTRIBUTE_TABLE, +#endif + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +ix86_function_ok_for_sibcall (tree decl, tree exp) +{ + tree func; + rtx a, b; + + /* APPLE LOCAL begin indirect sibcall 4087330 */ + /* If we are generating position-independent code, we cannot sibcall + optimize any indirect call, or a direct call to a global function, + as the PLT requires %ebx be live. (Darwin does not have a PLT.) */ + if (!TARGET_MACHO + && !TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) + return false; + /* APPLE LOCAL end indirect sibcall 4087330 */ + + if (decl) + func = decl; + else + { + func = TREE_TYPE (TREE_OPERAND (exp, 0)); + if (POINTER_TYPE_P (func)) + func = TREE_TYPE (func); + } + + /* Check that the return value locations are the same. Like + if we are returning floats on the 80387 register stack, we cannot + make a sibcall from a function that doesn't return a float to a + function that does or, conversely, from a function that does return + a float to a function that doesn't; the necessary stack adjustment + would not be executed. This is also the place we notice + differences in the return value ABI. Note that it is ok for one + of the functions to have void return type as long as the return + value of the other is passed in a register. */ + a = ix86_function_value (TREE_TYPE (exp), func, false); + b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (STACK_REG_P (a) || STACK_REG_P (b)) + { + if (!rtx_equal_p (a, b)) + return false; + } + else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + ; + else if (!rtx_equal_p (a, b)) + return false; + + /* If this call is indirect, we'll need to be able to use a call-clobbered + register for the address of the target function. Make sure that all + such registers are not used for passing parameters. */ + if (!decl && !TARGET_64BIT) + { + tree type; + + /* We're looking at the CALL_EXPR, we need the type of the function. */ + type = TREE_OPERAND (exp, 0); /* pointer expression */ + type = TREE_TYPE (type); /* pointer type */ + type = TREE_TYPE (type); /* function type */ + + if (ix86_function_regparm (type, NULL) >= 3) + { + /* ??? Need to count the actual number of registers to be used, + not the possible number of registers. Fix later. */ + return false; + } + } + +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES + /* Dllimport'd functions are also called indirectly. */ + if (decl && DECL_DLLIMPORT_P (decl) + && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) + return false; +#endif + + /* If we forced aligned the stack, then sibcalling would unalign the + stack, which may break the called function. */ + if (cfun->machine->force_align_arg_pointer) + return false; + + /* Otherwise okay. That also includes certain types of indirect calls. */ + return true; +} + +/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" + calling convention attributes; + arguments as in struct attribute_spec.handler. */ + +static tree +ix86_handle_cconv_attribute (tree *node, tree name, + tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qs attribute only applies to functions", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine regparm with all attributes but fastcall. */ + if (is_attribute_p ("regparm", name)) + { + tree cst; + + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and regparm attributes are not compatible"); + } + + /* APPLE LOCAL begin regparmandstackparm */ + if (!TARGET_64BIT + && (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node)) + || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (*node)))) + { + error ("regparmandstackparm and regparm attributes are not compatible"); + } + /* APPLE LOCAL end regparmandstackparm */ + + cst = TREE_VALUE (args); + if (TREE_CODE (cst) != INTEGER_CST) + { + warning (OPT_Wattributes, + "%qs attribute requires an integer constant argument", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + else if (compare_tree_int (cst, REGPARM_MAX) > 0) + { + warning (OPT_Wattributes, "argument to %qs attribute larger than %d", + IDENTIFIER_POINTER (name), REGPARM_MAX); + *no_add_attrs = true; + } + + if (!TARGET_64BIT + && lookup_attribute (ix86_force_align_arg_pointer_string, + TYPE_ATTRIBUTES (*node)) + && compare_tree_int (cst, REGPARM_MAX-1)) + { + error ("%s functions limited to %d register parameters", + ix86_force_align_arg_pointer_string, REGPARM_MAX-1); + } + + return NULL_TREE; + } + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* Turn on popcnt instruction for -msse4.2 or -mabm. */ + if (TARGET_SSE4_2) + x86_popcnt = true; + /* APPLE LOCAL end 5612787 mainline sse4 */ + + if (TARGET_64BIT) + { + warning (OPT_Wattributes, "%qs attribute ignored", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine fastcall with stdcall (redundant) and sseregparm. */ + /* APPLE LOCAL begin regparmandstackparm */ + if (is_attribute_p ("fastcall", name) + || is_attribute_p ("regparmandstackparm", name)) + /* APPLE LOCAL end regparmandstackparm */ + { + if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and stdcall attributes are not compatible"); + } + if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and regparm attributes are not compatible"); + } + } + + /* Can combine stdcall with fastcall (redundant), regparm and + sseregparm. */ + else if (is_attribute_p ("stdcall", name)) + { + if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and cdecl attributes are not compatible"); + } + /* APPLE LOCAL begin regparmandstackparm */ + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)) + || lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node))) + /* APPLE LOCAL end regparmandstackparm */ + { + error ("stdcall and fastcall attributes are not compatible"); + } + } + + /* Can combine cdecl with regparm and sseregparm. */ + else if (is_attribute_p ("cdecl", name)) + { + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and cdecl attributes are not compatible"); + } + } + + /* Can combine sseregparm with all attributes. */ + + return NULL_TREE; +} + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ + +static int +ix86_comp_type_attributes (tree type1, tree type2) +{ + /* Check for mismatch of non-default calling convention. */ + const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; + + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched fastcall/regparm types. */ + if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) + || (ix86_function_regparm (type1, NULL) + != ix86_function_regparm (type2, NULL))) + return 0; + + /* Check for mismatched sseregparm types. */ + if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) + return 0; + + /* Check for mismatched return types (cdecl vs stdcall). */ + if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) + != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) + return 0; + + return 1; +} + +/* Return the regparm value for a function with the indicated TYPE and DECL. + DECL may be NULL when calling function indirectly + or considering a libcall. */ + +static int +ix86_function_regparm (tree type, tree decl) +{ + tree attr; + /* APPLE LOCAL begin MERGE FIXME audit to ensure that it's ok + + We had local_regparm but the FSF didn't and there didn't seem to + be a merge conflict some something is strange. These seem to be just + normal apple local changes. I asked Stuart about them in email. */ + int regparm = ix86_regparm; + bool user_convention = false; + + if (!TARGET_64BIT) + { + attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); + if (attr) + { + regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); + user_convention = true; + } + + /* APPLE LOCAL begin regparmandstackparm */ + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)) + || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type))) + /* APPLE LOCAL end regparmandstackparm */ + { + regparm = 2; + user_convention = true; + } + + /* Use register calling convention for local functions when possible. */ + if (!TARGET_64BIT && !user_convention && decl + && flag_unit_at_a_time && !profile_flag) + { + struct cgraph_local_info *i = cgraph_local_info (decl); + if (i && i->local) + { + int local_regparm, globals = 0, regno; + + /* Make sure no regparm register is taken by a global register + variable. */ + for (local_regparm = 0; local_regparm < 3; local_regparm++) + if (global_regs[local_regparm]) + break; + /* We can't use regparm(3) for nested functions as these use + static chain pointer in third argument. */ + if (local_regparm == 3 + /* APPLE LOCAL begin mainline */ + && (decl_function_context (decl) + || ix86_force_align_arg_pointer) + /* APPLE LOCAL end mainline */ + && !DECL_NO_STATIC_CHAIN (decl)) + local_regparm = 2; + /* If the function realigns its stackpointer, the + prologue will clobber %ecx. If we've already + generated code for the callee, the callee + DECL_STRUCT_FUNCTION is gone, so we fall back to + scanning the attributes for the self-realigning + property. */ + if ((DECL_STRUCT_FUNCTION (decl) + /* MERGE FIXME was in our version, but not in FSF 2006-05-23 */ + && DECL_STRUCT_FUNCTION (decl)->machine + && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) + || (!DECL_STRUCT_FUNCTION (decl) + && lookup_attribute (ix86_force_align_arg_pointer_string, + TYPE_ATTRIBUTES (TREE_TYPE (decl))))) + local_regparm = 2; + /* Each global register variable increases register preassure, + so the more global reg vars there are, the smaller regparm + optimization use, unless requested by the user explicitly. */ + for (regno = 0; regno < 6; regno++) + if (global_regs[regno]) + globals++; + local_regparm + = globals < local_regparm ? local_regparm - globals : 0; + + if (local_regparm > regparm) + regparm = local_regparm; + } + } + } + /* APPLE LOCAL end MERGE FIXME audit to ensure that it's ok */ + return regparm; +} + +/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and + DFmode (2) arguments in SSE registers for a function with the + indicated TYPE and DECL. DECL may be NULL when calling function + indirectly or considering a libcall. Otherwise return 0. */ + +static int +ix86_function_sseregparm (tree type, tree decl) +{ + /* Use SSE registers to pass SFmode and DFmode arguments if requested + by the sseregparm attribute. */ + if (TARGET_SSEREGPARM + || (type + && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) + { + if (!TARGET_SSE) + { + if (decl) + error ("Calling %qD with attribute sseregparm without " + "SSE/SSE2 enabled", decl); + else + error ("Calling %qT with attribute sseregparm without " + "SSE/SSE2 enabled", type); + return 0; + } + + return 2; + } + + /* APPLE LOCAL begin regparmandstackparm */ + if (type && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type))) + return 2; + /* APPLE LOCAL end regparmandstackparm */ + + /* For local functions, pass up to SSE_REGPARM_MAX SFmode + (and DFmode for SSE2) arguments in SSE registers, + even for 32-bit targets. */ + if (!TARGET_64BIT && decl + && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) + { + struct cgraph_local_info *i = cgraph_local_info (decl); + if (i && i->local) + return TARGET_SSE2 ? 2 : 1; + } + + return 0; +} + +/* Return true if EAX is live at the start of the function. Used by + ix86_expand_prologue to determine if we need special help before + calling allocate_stack_worker. */ + +static bool +ix86_eax_live_at_start_p (void) +{ + /* Cheat. Don't bother working forward from ix86_function_regparm + to the function type to whether an actual argument is located in + eax. Instead just look at cfg info, which is still close enough + to correct at this point. This gives false positives for broken + functions that might use uninitialized data that happens to be + allocated in eax, but who cares? */ + return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); +} + +/* Value is the number of bytes of arguments automatically + popped when returning from a subroutine call. + FUNDECL is the declaration node of the function (as a tree), + FUNTYPE is the data type of the function (as a tree), + or for a library call it is an identifier node for the subroutine name. + SIZE is the number of bytes of arguments passed on the stack. + + On the 80386, the RTD insn may be used to pop them if the number + of args is fixed, but if the number is variable then the caller + must pop them all. RTD can't be used for library calls now + because the library is compiled with the Unix compiler. + Use of RTD is a selectable option, since it is incompatible with + standard Unix calling sequences. If the option is not selected, + the caller must always pop the args. + + The attribute stdcall is equivalent to RTD on a per module basis. */ + +int +ix86_return_pops_args (tree fundecl, tree funtype, int size) +{ + int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); + + /* Cdecl functions override -mrtd, and never pop the stack. */ + if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { + + /* Stdcall and fastcall functions will pop the stack if not + variable args. */ + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) + || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) + rtd = 1; + + if (rtd + && (TYPE_ARG_TYPES (funtype) == NULL_TREE + || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) + == void_type_node))) + return size; + } + + /* Lose any fake structure return argument if it is passed on the stack. */ + if (aggregate_value_p (TREE_TYPE (funtype), fundecl) + && !TARGET_64BIT + && !KEEP_AGGREGATE_RETURN_POINTER) + { + int nregs = ix86_function_regparm (funtype, fundecl); + + if (!nregs) + return GET_MODE_SIZE (Pmode); + } + + return 0; +} + +/* Argument support functions. */ + +/* Return true when register may be used to pass function parameters. */ +bool +ix86_function_arg_regno_p (int regno) +{ + int i; + if (!TARGET_64BIT) + { + if (TARGET_MACHO) + return (regno < REGPARM_MAX + || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); + else + return (regno < REGPARM_MAX + || (TARGET_MMX && MMX_REGNO_P (regno) + && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) + || (TARGET_SSE && SSE_REGNO_P (regno) + && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); + } + + if (TARGET_MACHO) + { + if (SSE_REGNO_P (regno) && TARGET_SSE) + return true; + } + else + { + if (TARGET_SSE && SSE_REGNO_P (regno) + && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) + return true; + } + /* RAX is used as hidden argument to va_arg functions. */ + if (!regno) + return true; + for (i = 0; i < REGPARM_MAX; i++) + if (regno == x86_64_int_parameter_registers[i]) + return true; + return false; +} + +/* Return if we do not know how to pass TYPE solely in registers. */ + +static bool +ix86_must_pass_in_stack (enum machine_mode mode, tree type) +{ + if (must_pass_in_stack_var_size_or_pad (mode, type)) + return true; + + /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! + The layout_type routine is crafty and tries to trick us into passing + currently unsupported vector types on the stack by using TImode. */ + return (!TARGET_64BIT && mode == TImode + && type && TREE_CODE (type) != VECTOR_TYPE); +} + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ + tree fntype, /* tree ptr for function decl */ + rtx libname, /* SYMBOL_REF of library name or 0 */ + tree fndecl) +{ + static CUMULATIVE_ARGS zero_cum; + tree param, next_param; + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "\ninit_cumulative_args ("); + if (fntype) + fprintf (stderr, "fntype code = %s, ret code = %s", + tree_code_name[(int) TREE_CODE (fntype)], + tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); + else + fprintf (stderr, "no fntype"); + + if (libname) + fprintf (stderr, ", libname = %s", XSTR (libname, 0)); + } + + *cum = zero_cum; + + /* Set up the number of registers to use for passing arguments. */ + cum->nregs = ix86_regparm; + if (TARGET_SSE) + cum->sse_nregs = SSE_REGPARM_MAX; + if (TARGET_MMX) + cum->mmx_nregs = MMX_REGPARM_MAX; + cum->warn_sse = true; + cum->warn_mmx = true; + cum->maybe_vaarg = false; + + /* Use ecx and edx registers if function has fastcall attribute, + else look for regparm information. */ + if (fntype && !TARGET_64BIT) + { + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) + { + cum->nregs = 2; + cum->fastcall = 1; + } + else + cum->nregs = ix86_function_regparm (fntype, fndecl); + } + + /* Set up the number of SSE registers used for passing SFmode + and DFmode arguments. Warn for mismatching ABI. */ + cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); + + /* Determine if this function has variable arguments. This is + indicated by the last argument being 'void_type_mode' if there + are no variable arguments. If there are variable arguments, then + we won't pass anything in registers in 32-bit mode. */ + + if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) + { + for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; + param != 0; param = next_param) + { + next_param = TREE_CHAIN (param); + if (next_param == 0 && TREE_VALUE (param) != void_type_node) + { + if (!TARGET_64BIT) + { + cum->nregs = 0; + cum->sse_nregs = 0; + cum->mmx_nregs = 0; + cum->warn_sse = 0; + cum->warn_mmx = 0; + cum->fastcall = 0; + cum->float_in_sse = 0; + } + cum->maybe_vaarg = true; + } + } + } + if ((!fntype && !libname) + || (fntype && !TYPE_ARG_TYPES (fntype))) + cum->maybe_vaarg = true; + + if (TARGET_DEBUG_ARG) + fprintf (stderr, ", nregs=%d )\n", cum->nregs); + + return; +} + +/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. + But in the case of vector types, it is some vector mode. + + When we have only some of our vector isa extensions enabled, then there + are some modes for which vector_mode_supported_p is false. For these + modes, the generic vector support in gcc will choose some non-vector mode + in order to implement the type. By computing the natural mode, we'll + select the proper ABI location for the operand and not depend on whatever + the middle-end decides to do with these vector types. */ + +static enum machine_mode +type_natural_mode (tree type) +{ + enum machine_mode mode = TYPE_MODE (type); + + if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if ((size == 8 || size == 16) + /* ??? Generic code allows us to create width 1 vectors. Ignore. */ + && TYPE_VECTOR_SUBPARTS (type) > 1) + { + enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); + + if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) + mode = MIN_MODE_VECTOR_FLOAT; + else + mode = MIN_MODE_VECTOR_INT; + + /* Get the mode which has this inner mode and number of units. */ + for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) + if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) + && GET_MODE_INNER (mode) == innermode) + return mode; + + gcc_unreachable (); + } + } + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + /* Pass V1DImode objects as DImode. This is for compatibility. */ + if (TREE_CODE (type) == VECTOR_TYPE && mode == V1DImode && !TARGET_64BIT) + return DImode; + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + return mode; +} + +/* We want to pass a value in REGNO whose "natural" mode is MODE. However, + this may not agree with the mode that the type system has chosen for the + register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can + go ahead and use it. Otherwise we have to build a PARALLEL instead. */ + +static rtx +gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, + unsigned int regno) +{ + rtx tmp; + + if (orig_mode != BLKmode) + tmp = gen_rtx_REG (orig_mode, regno); + else + { + tmp = gen_rtx_REG (mode, regno); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); + } + + return tmp; +} + +/* x86-64 register passing implementation. See x86-64 ABI for details. Goal + of this code is to classify each 8bytes of incoming argument by the register + class and assign registers accordingly. */ + +/* Return the union class of CLASS1 and CLASS2. + See the x86-64 PS ABI for details. */ + +static enum x86_64_reg_class +merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) +{ + /* Rule #1: If both classes are equal, this is the resulting class. */ + if (class1 == class2) + return class1; + + /* Rule #2: If one of the classes is NO_CLASS, the resulting class is + the other class. */ + if (class1 == X86_64_NO_CLASS) + return class2; + if (class2 == X86_64_NO_CLASS) + return class1; + + /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ + if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ + if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) + || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) + return X86_64_INTEGERSI_CLASS; + if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS + || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) + return X86_64_INTEGER_CLASS; + + /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, + MEMORY is used. */ + if (class1 == X86_64_X87_CLASS + || class1 == X86_64_X87UP_CLASS + || class1 == X86_64_COMPLEX_X87_CLASS + || class2 == X86_64_X87_CLASS + || class2 == X86_64_X87UP_CLASS + || class2 == X86_64_COMPLEX_X87_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #6: Otherwise class SSE is used. */ + return X86_64_SSE_CLASS; +} + +/* Classify the argument of type TYPE and mode MODE. + CLASSES will be filled by the register class used to pass each word + of the operand. The number of words is returned. In case the parameter + should be passed in memory, 0 is returned. As a special case for zero + sized containers, classes[0] will be NO_CLASS and 1 is returned. + + BIT_OFFSET is used internally for handling records and specifies offset + of the offset in bits modulo 256 to avoid overflow cases. + + See the x86-64 PS ABI for details. +*/ + +static int +classify_argument (enum machine_mode mode, tree type, + enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) +{ + HOST_WIDE_INT bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + /* Variable sized entities are always passed/returned in memory. */ + if (bytes < 0) + return 0; + + if (mode != VOIDmode + && targetm.calls.must_pass_in_stack (mode, type)) + return 0; + + if (type && AGGREGATE_TYPE_P (type)) + { + int i; + tree field; + enum x86_64_reg_class subclasses[MAX_CLASSES]; + + /* On x86-64 we pass structures larger than 16 bytes on the stack. */ + if (bytes > 16) + return 0; + + for (i = 0; i < words; i++) + classes[i] = X86_64_NO_CLASS; + + /* Zero sized arrays or structures are NO_CLASS. We return 0 to + signalize memory class, so handle it as special case. */ + if (!words) + { + classes[0] = X86_64_NO_CLASS; + return 1; + } + + /* Classify each field of record and merge classes. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + /* For classes first merge in the field of the subclasses. */ + if (TYPE_BINFO (type)) + { + tree binfo, base_binfo; + int basenum; + + for (binfo = TYPE_BINFO (type), basenum = 0; + BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) + { + int num; + int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; + tree type = BINFO_TYPE (base_binfo); + + num = classify_argument (TYPE_MODE (type), + type, subclasses, + (offset + bit_offset) % 256); + if (!num) + return 0; + for (i = 0; i < num; i++) + { + int pos = (offset + (bit_offset % 64)) / 8 / 8; + classes[i + pos] = + merge_classes (subclasses[i], classes[i + pos]); + } + } + } + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + int num; + + if (TREE_TYPE (field) == error_mark_node) + continue; + + /* Bitfields are always classified as integer. Handle them + early, since later code would consider them to be + misaligned integers. */ + if (DECL_BIT_FIELD (field)) + { + for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; + i < ((int_bit_position (field) + (bit_offset % 64)) + + tree_low_cst (DECL_SIZE (field), 0) + + 63) / 8 / 8; i++) + classes[i] = + merge_classes (X86_64_INTEGER_CLASS, + classes[i]); + } + else + { + num = classify_argument (TYPE_MODE (TREE_TYPE (field)), + TREE_TYPE (field), subclasses, + (int_bit_position (field) + + bit_offset) % 256); + if (!num) + return 0; + for (i = 0; i < num; i++) + { + int pos = + (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; + classes[i + pos] = + merge_classes (subclasses[i], classes[i + pos]); + } + } + } + } + break; + + case ARRAY_TYPE: + /* Arrays are handled as small records. */ + { + int num; + num = classify_argument (TYPE_MODE (TREE_TYPE (type)), + TREE_TYPE (type), subclasses, bit_offset); + if (!num) + return 0; + + /* The partial classes are now full classes. */ + if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) + subclasses[0] = X86_64_SSE_CLASS; + if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) + subclasses[0] = X86_64_INTEGER_CLASS; + + for (i = 0; i < words; i++) + classes[i] = subclasses[i % num]; + + break; + } + case UNION_TYPE: + case QUAL_UNION_TYPE: + /* Unions are similar to RECORD_TYPE but offset is always 0. + */ + + /* Unions are not derived. */ + gcc_assert (!TYPE_BINFO (type) + || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + int num; + + if (TREE_TYPE (field) == error_mark_node) + continue; + + num = classify_argument (TYPE_MODE (TREE_TYPE (field)), + TREE_TYPE (field), subclasses, + bit_offset); + if (!num) + return 0; + for (i = 0; i < num; i++) + classes[i] = merge_classes (subclasses[i], classes[i]); + } + } + break; + + default: + gcc_unreachable (); + } + + /* Final merger cleanup. */ + for (i = 0; i < words; i++) + { + /* If one class is MEMORY, everything should be passed in + memory. */ + if (classes[i] == X86_64_MEMORY_CLASS) + return 0; + + /* The X86_64_SSEUP_CLASS should be always preceded by + X86_64_SSE_CLASS. */ + if (classes[i] == X86_64_SSEUP_CLASS + && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) + classes[i] = X86_64_SSE_CLASS; + + /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ + if (classes[i] == X86_64_X87UP_CLASS + && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) + classes[i] = X86_64_SSE_CLASS; + } + return words; + } + + /* Compute alignment needed. We align all types to natural boundaries with + exception of XFmode that is aligned to 64bits. */ + if (mode != VOIDmode && mode != BLKmode) + { + int mode_alignment = GET_MODE_BITSIZE (mode); + + if (mode == XFmode) + mode_alignment = 128; + else if (mode == XCmode) + mode_alignment = 256; + if (COMPLEX_MODE_P (mode)) + mode_alignment /= 2; + /* Misaligned fields are always returned in memory. */ + if (bit_offset % mode_alignment) + return 0; + } + + /* for V1xx modes, just use the base mode */ + if (VECTOR_MODE_P (mode) + && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) + mode = GET_MODE_INNER (mode); + + /* Classification of atomic types. */ + switch (mode) + { + case SDmode: + case DDmode: + classes[0] = X86_64_SSE_CLASS; + return 1; + case TDmode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case DImode: + case SImode: + case HImode: + case QImode: + case CSImode: + case CHImode: + case CQImode: + if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) + classes[0] = X86_64_INTEGERSI_CLASS; + else + classes[0] = X86_64_INTEGER_CLASS; + return 1; + case CDImode: + case TImode: + classes[0] = classes[1] = X86_64_INTEGER_CLASS; + return 2; + case CTImode: + return 0; + case SFmode: + if (!(bit_offset % 64)) + classes[0] = X86_64_SSESF_CLASS; + else + classes[0] = X86_64_SSE_CLASS; + return 1; + case DFmode: + classes[0] = X86_64_SSEDF_CLASS; + return 1; + case XFmode: + classes[0] = X86_64_X87_CLASS; + classes[1] = X86_64_X87UP_CLASS; + return 2; + case TFmode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case SCmode: + classes[0] = X86_64_SSE_CLASS; + return 1; + case DCmode: + classes[0] = X86_64_SSEDF_CLASS; + classes[1] = X86_64_SSEDF_CLASS; + return 2; + case XCmode: + classes[0] = X86_64_COMPLEX_X87_CLASS; + return 1; + case TCmode: + /* This modes is larger than 16 bytes. */ + return 0; + case V4SFmode: + case V4SImode: + case V16QImode: + case V8HImode: + case V2DFmode: + case V2DImode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case V2SFmode: + case V2SImode: + case V4HImode: + case V8QImode: + classes[0] = X86_64_SSE_CLASS; + return 1; + case BLKmode: + case VOIDmode: + return 0; + default: + gcc_assert (VECTOR_MODE_P (mode)); + + if (bytes > 16) + return 0; + + gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); + + if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) + classes[0] = X86_64_INTEGERSI_CLASS; + else + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGER_CLASS; + return 1 + (bytes > 8); + } +} + +/* Examine the argument and return set number of register required in each + class. Return 0 iff parameter should be passed in memory. */ +static int +examine_argument (enum machine_mode mode, tree type, int in_return, + int *int_nregs, int *sse_nregs) +{ + enum x86_64_reg_class class[MAX_CLASSES]; + int n = classify_argument (mode, type, class, 0); + + *int_nregs = 0; + *sse_nregs = 0; + if (!n) + return 0; + for (n--; n >= 0; n--) + switch (class[n]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + (*int_nregs)++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + (*sse_nregs)++; + break; + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; + case X86_64_X87_CLASS: + case X86_64_X87UP_CLASS: + if (!in_return) + return 0; + break; + case X86_64_COMPLEX_X87_CLASS: + return in_return ? 2 : 0; + case X86_64_MEMORY_CLASS: + gcc_unreachable (); + } + return 1; +} + +/* Construct container for the argument used by GCC interface. See + FUNCTION_ARG for the detailed description. */ + +static rtx +construct_container (enum machine_mode mode, enum machine_mode orig_mode, + tree type, int in_return, int nintregs, int nsseregs, + const int *intreg, int sse_regno) +{ + /* The following variables hold the static issued_error state. */ + static bool issued_sse_arg_error; + static bool issued_sse_ret_error; + static bool issued_x87_ret_error; + + enum machine_mode tmpmode; + int bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + enum x86_64_reg_class class[MAX_CLASSES]; + int n; + int i; + int nexps = 0; + int needed_sseregs, needed_intregs; + rtx exp[MAX_CLASSES]; + rtx ret; + + n = classify_argument (mode, type, class, 0); + if (TARGET_DEBUG_ARG) + { + if (!n) + fprintf (stderr, "Memory class\n"); + else + { + fprintf (stderr, "Classes:"); + for (i = 0; i < n; i++) + { + fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); + } + fprintf (stderr, "\n"); + } + } + if (!n) + return NULL; + if (!examine_argument (mode, type, in_return, &needed_intregs, + &needed_sseregs)) + return NULL; + if (needed_intregs > nintregs || needed_sseregs > nsseregs) + return NULL; + + /* We allowed the user to turn off SSE for kernel mode. Don't crash if + some less clueful developer tries to use floating-point anyway. */ + if (needed_sseregs && !TARGET_SSE) + { + if (in_return) + { + if (!issued_sse_ret_error) + { + error ("SSE register return with SSE disabled"); + issued_sse_ret_error = true; + } + } + else if (!issued_sse_arg_error) + { + error ("SSE register argument with SSE disabled"); + issued_sse_arg_error = true; + } + return NULL; + } + + /* Likewise, error if the ABI requires us to return values in the + x87 registers and the user specified -mno-80387. */ + if (!TARGET_80387 && in_return) + for (i = 0; i < n; i++) + if (class[i] == X86_64_X87_CLASS + || class[i] == X86_64_X87UP_CLASS + || class[i] == X86_64_COMPLEX_X87_CLASS) + { + if (!issued_x87_ret_error) + { + error ("x87 register return with x87 disabled"); + issued_x87_ret_error = true; + } + return NULL; + } + + /* First construct simple cases. Avoid SCmode, since we want to use + single register to pass this type. */ + if (n == 1 && mode != SCmode) + switch (class[0]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + return gen_rtx_REG (mode, intreg[0]); + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); + case X86_64_X87_CLASS: + case X86_64_COMPLEX_X87_CLASS: + return gen_rtx_REG (mode, FIRST_STACK_REG); + case X86_64_NO_CLASS: + /* Zero sized array, struct or class. */ + return NULL; + default: + gcc_unreachable (); + } + if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS + && mode != BLKmode) + return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); + if (n == 2 + && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) + return gen_rtx_REG (XFmode, FIRST_STACK_REG); + if (n == 2 && class[0] == X86_64_INTEGER_CLASS + && class[1] == X86_64_INTEGER_CLASS + && (mode == CDImode || mode == TImode || mode == TFmode) + && intreg[0] + 1 == intreg[1]) + return gen_rtx_REG (mode, intreg[0]); + + /* Otherwise figure out the entries of the PARALLEL. */ + for (i = 0; i < n; i++) + { + switch (class[i]) + { + case X86_64_NO_CLASS: + break; + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + /* Merge TImodes on aligned occasions here too. */ + if (i * 8 + 8 > bytes) + tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); + else if (class[i] == X86_64_INTEGERSI_CLASS) + tmpmode = SImode; + else + tmpmode = DImode; + /* We've requested 24 bytes we don't have mode for. Use DImode. */ + if (tmpmode == BLKmode) + tmpmode = DImode; + exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (tmpmode, *intreg), + GEN_INT (i*8)); + intreg++; + break; + case X86_64_SSESF_CLASS: + exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (SFmode, + SSE_REGNO (sse_regno)), + GEN_INT (i*8)); + sse_regno++; + break; + case X86_64_SSEDF_CLASS: + exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DFmode, + SSE_REGNO (sse_regno)), + GEN_INT (i*8)); + sse_regno++; + break; + case X86_64_SSE_CLASS: + if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) + tmpmode = TImode; + else + tmpmode = DImode; + exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (tmpmode, + SSE_REGNO (sse_regno)), + GEN_INT (i*8)); + if (tmpmode == TImode) + i++; + sse_regno++; + break; + default: + gcc_unreachable (); + } + } + + /* Empty aligned struct, union or class. */ + if (nexps == 0) + return NULL; + + ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); + for (i = 0; i < nexps; i++) + XVECEXP (ret, 0, i) = exp [i]; + return ret; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +void +function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, int named) +{ + int bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (type) + mode = type_natural_mode (type); + + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " + "mode=%s, named=%d)\n\n", + words, cum->words, cum->nregs, cum->sse_nregs, + GET_MODE_NAME (mode), named); + + if (TARGET_64BIT) + { + int int_nregs, sse_nregs; + if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) + cum->words += words; + else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) + { + cum->nregs -= int_nregs; + cum->sse_nregs -= sse_nregs; + cum->regno += int_nregs; + cum->sse_regno += sse_nregs; + } + else + cum->words += words; + } + else + { + switch (mode) + { + default: + break; + + case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ + + case DImode: + case SImode: + case HImode: + case QImode: + cum->words += words; + cum->nregs -= words; + cum->regno += words; + + if (cum->nregs <= 0) + { + cum->nregs = 0; + cum->regno = 0; + } + break; + + case DFmode: + if (cum->float_in_sse < 2) + break; + case SFmode: + if (cum->float_in_sse < 1) + break; + /* FALLTHRU */ + + case TImode: + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V4SFmode: + case V2DFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + cum->sse_words += words; + cum->sse_nregs -= 1; + cum->sse_regno += 1; + if (cum->sse_nregs <= 0) + { + cum->sse_nregs = 0; + cum->sse_regno = 0; + } + } + break; + + case V8QImode: + case V4HImode: + case V2SImode: + case V2SFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + cum->mmx_words += words; + cum->mmx_nregs -= 1; + cum->mmx_regno += 1; + if (cum->mmx_nregs <= 0) + { + cum->mmx_nregs = 0; + cum->mmx_regno = 0; + } + } + break; + } + } +} + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ + +rtx +function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, + tree type, int named) +{ + enum machine_mode mode = orig_mode; + rtx ret = NULL_RTX; + int bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + static bool warnedsse, warnedmmx; + + /* To simplify the code below, represent vector types with a vector mode + even if MMX/SSE are not active. */ + if (type && TREE_CODE (type) == VECTOR_TYPE) + mode = type_natural_mode (type); + + /* Handle a hidden AL argument containing number of registers for varargs + x86-64 functions. For i386 ABI just return constm1_rtx to avoid + any AL settings. */ + if (mode == VOIDmode) + { + if (TARGET_64BIT) + return GEN_INT (cum->maybe_vaarg + ? (cum->sse_nregs < 0 + ? SSE_REGPARM_MAX + : cum->sse_regno) + : -1); + else + return constm1_rtx; + } + if (TARGET_64BIT) + ret = construct_container (mode, orig_mode, type, 0, cum->nregs, + cum->sse_nregs, + &x86_64_int_parameter_registers [cum->regno], + cum->sse_regno); + else + switch (mode) + { + /* For now, pass fp/complex values on the stack. */ + default: + break; + + case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ + case DImode: + case SImode: + case HImode: + case QImode: + if (words <= cum->nregs) + { + int regno = cum->regno; + + /* Fastcall allocates the first two DWORD (SImode) or + smaller arguments to ECX and EDX. */ + if (cum->fastcall) + { + if (mode == BLKmode || mode == DImode) + break; + + /* ECX not EAX is the first allocated register. */ + if (regno == 0) + regno = 2; + } + ret = gen_rtx_REG (mode, regno); + } + break; + case DFmode: + if (cum->float_in_sse < 2) + break; + case SFmode: + if (cum->float_in_sse < 1) + break; + /* FALLTHRU */ + case TImode: + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V4SFmode: + case V2DFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + if (!TARGET_SSE && !warnedsse && cum->warn_sse) + { + warnedsse = true; + warning (0, "SSE vector argument without SSE enabled " + "changes the ABI"); + } + if (cum->sse_nregs) + ret = gen_reg_or_parallel (mode, orig_mode, + cum->sse_regno + FIRST_SSE_REG); + } + break; + case V8QImode: + case V4HImode: + case V2SImode: + case V2SFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) + { + warnedmmx = true; + warning (0, "MMX vector argument without MMX enabled " + "changes the ABI"); + } + if (cum->mmx_nregs) + ret = gen_reg_or_parallel (mode, orig_mode, + cum->mmx_regno + FIRST_MMX_REG); + } + break; + } + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, + "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", + words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); + + if (ret) + print_simple_rtl (stderr, ret); + else + fprintf (stderr, ", stack"); + + fprintf (stderr, " )\n"); + } + + return ret; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +static bool +ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type, bool named ATTRIBUTE_UNUSED) +{ + if (!TARGET_64BIT) + return 0; + + if (type && int_size_in_bytes (type) == -1) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference\n"); + return 1; + } + + return 0; +} + +/* Return true when TYPE should be 128bit aligned for 32bit argument passing + ABI. Only called if TARGET_SSE. */ +static bool +contains_128bit_aligned_vector_p (tree type) +{ + enum machine_mode mode = TYPE_MODE (type); + if (SSE_REG_MODE_P (mode) + && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) + return true; + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the aggregates recursively. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + tree field; + + if (TYPE_BINFO (type)) + { + tree binfo, base_binfo; + int i; + + for (binfo = TYPE_BINFO (type), i = 0; + BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) + if (contains_128bit_aligned_vector_p + (BINFO_TYPE (base_binfo))) + return true; + } + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && contains_128bit_aligned_vector_p (TREE_TYPE (field))) + return true; + } + break; + } + + case ARRAY_TYPE: + /* Just for use if some languages passes arrays by value. */ + if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) + return true; + break; + + default: + gcc_unreachable (); + } + } + return false; +} + +/* Gives the alignment boundary, in bits, of an argument with the + specified mode and type. */ + +int +ix86_function_arg_boundary (enum machine_mode mode, tree type) +{ + int align; + if (type) + align = TYPE_ALIGN (type); + else + align = GET_MODE_ALIGNMENT (mode); + /* APPLE LOCAL begin unbreak ppc64 abi 5103220 */ + if (type && integer_zerop (TYPE_SIZE (type))) + align = PARM_BOUNDARY; + /* APPLE LOCAL end unbreak ppc64 abi 5103220 */ + if (align < PARM_BOUNDARY) + align = PARM_BOUNDARY; + if (!TARGET_64BIT) + { + /* i386 ABI defines all arguments to be 4 byte aligned. We have to + make an exception for SSE modes since these require 128bit + alignment. + + The handling here differs from field_alignment. ICC aligns MMX + arguments to 4 byte boundaries, while structure fields are aligned + to 8 byte boundaries. */ + if (!TARGET_SSE) + align = PARM_BOUNDARY; + else if (!type) + { + if (!SSE_REG_MODE_P (mode)) + align = PARM_BOUNDARY; + } + else + { + if (!contains_128bit_aligned_vector_p (type)) + align = PARM_BOUNDARY; + } + } + if (align > 128) + align = 128; + return align; +} + +/* Return true if N is a possible register number of function value. */ +bool +ix86_function_value_regno_p (int regno) +{ + if (TARGET_MACHO) + { + if (!TARGET_64BIT) + { + return ((regno) == 0 + || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) + || ((regno) == FIRST_SSE_REG && TARGET_SSE)); + } + return ((regno) == 0 || (regno) == FIRST_FLOAT_REG + || ((regno) == FIRST_SSE_REG && TARGET_SSE) + || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); + } + else + { + if (regno == 0 + || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) + || (regno == FIRST_SSE_REG && TARGET_SSE)) + return true; + + if (!TARGET_64BIT + && (regno == FIRST_MMX_REG && TARGET_MMX)) + return true; + + return false; + } +} + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. */ +rtx +ix86_function_value (tree valtype, tree fntype_or_decl, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode natmode = type_natural_mode (valtype); + + if (TARGET_64BIT) + { + rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, + 1, REGPARM_MAX, SSE_REGPARM_MAX, + x86_64_int_return_registers, 0); + /* For zero sized structures, construct_container return NULL, but we + need to keep rest of compiler happy by returning meaningful value. */ + if (!ret) + ret = gen_rtx_REG (TYPE_MODE (valtype), 0); + return ret; + } + else + { + tree fn = NULL_TREE, fntype; + if (fntype_or_decl + && DECL_P (fntype_or_decl)) + fn = fntype_or_decl; + fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; + return gen_rtx_REG (TYPE_MODE (valtype), + ix86_value_regno (natmode, fn, fntype)); + } +} + +/* APPLE LOCAL begin radar 4781080 */ +/* Return true iff must generate call to objcMsgSend for an + fp-returning method. */ +bool +ix86_objc_fpreturn_msgcall (tree ret_type, bool no_long_double) +{ + if (no_long_double) + return TARGET_64BIT && SCALAR_FLOAT_TYPE_P (ret_type) + && TYPE_MODE (ret_type) != XFmode; + else + return SCALAR_FLOAT_TYPE_P (ret_type); +} +/* APPLE LOCAL end radar 4781080 */ + +/* Return true iff type is returned in memory. */ +int +ix86_return_in_memory (tree type) +{ + int needed_intregs, needed_sseregs, size; + enum machine_mode mode = type_natural_mode (type); + + if (TARGET_64BIT) + return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); + + if (mode == BLKmode) + return 1; + + size = int_size_in_bytes (type); + + if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) + return 0; + + if (VECTOR_MODE_P (mode) || mode == TImode) + { + /* User-created vectors small enough to fit in EAX. */ + if (size < 8) + return 0; + + /* MMX/3dNow values are returned in MM0, + except when it doesn't exits. */ + if (size == 8) + /* APPLE LOCAL begin radar 4875125. */ + /* Undo the mainline patch which broke MACHO ABI compatibility. */ + return (TARGET_MACHO) ? 1 : (TARGET_MMX ? 0 : 1); + /* APPLE LOCAL end radar 4875125. */ + + /* SSE values are returned in XMM0, except when it doesn't exist. */ + if (size == 16) + return (TARGET_SSE ? 0 : 1); + } + + if (mode == XFmode) + return 0; + + if (mode == TDmode) + return 1; + + if (size > 12) + return 1; + return 0; +} + +/* When returning SSE vector types, we have a choice of either + (1) being abi incompatible with a -march switch, or + (2) generating an error. + Given no good solution, I think the safest thing is one warning. + The user won't be able to use -Werror, but.... + + Choose the STRUCT_VALUE_RTX hook because that's (at present) only + called in response to actually generating a caller or callee that + uses such a type. As opposed to RETURN_IN_MEMORY, which is called + via aggregate_value_p for general type probing from tree-ssa. */ + +static rtx +ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) +{ + static bool warnedsse, warnedmmx; + + if (type) + { + /* Look at the return type of the function, not the function type. */ + enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); + + if (!TARGET_SSE && !warnedsse) + { + if (mode == TImode + || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) + { + warnedsse = true; + warning (0, "SSE vector return without SSE enabled " + "changes the ABI"); + } + } + + if (!TARGET_MMX && !warnedmmx) + { + if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) + { + warnedmmx = true; + warning (0, "MMX vector return without MMX enabled " + "changes the ABI"); + } + } + } + + return NULL; +} + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +rtx +ix86_libcall_value (enum machine_mode mode) +{ + if (TARGET_64BIT) + { + switch (mode) + { + case SFmode: + case SCmode: + case DFmode: + case DCmode: + case TFmode: + case SDmode: + case DDmode: + case TDmode: + return gen_rtx_REG (mode, FIRST_SSE_REG); + case XFmode: + case XCmode: + return gen_rtx_REG (mode, FIRST_FLOAT_REG); + case TCmode: + return NULL; + default: + return gen_rtx_REG (mode, 0); + } + } + else + return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); +} + +/* Given a mode, return the register to use for a return value. */ + +static int +ix86_value_regno (enum machine_mode mode, tree func, tree fntype) +{ + gcc_assert (!TARGET_64BIT); + + /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where + we normally prevent this case when mmx is not available. However + some ABIs may require the result to be returned like DImode. */ + if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) + return TARGET_MMX ? FIRST_MMX_REG : 0; + + /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where + we prevent this case when sse is not available. However some ABIs + may require the result to be returned like integer TImode. */ + if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) + return TARGET_SSE ? FIRST_SSE_REG : 0; + + /* Decimal floating point values can go in %eax, unlike other float modes. */ + if (DECIMAL_FLOAT_MODE_P (mode)) + return 0; + + /* APPLE LOCAL begin regparmandstackparm */ + if (SSE_FLOAT_MODE_P(mode) + && fntype && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (fntype))) + return FIRST_SSE_REG; + /* APPLE LOCAL end regparmandstackparm */ + + /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ + if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) + return 0; + + /* Floating point return values in %st(0), except for local functions when + SSE math is enabled or for functions with sseregparm attribute. */ + if ((func || fntype) + && (mode == SFmode || mode == DFmode)) + { + int sse_level = ix86_function_sseregparm (fntype, func); + if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) + return FIRST_SSE_REG; + } + + return FIRST_FLOAT_REG; +} + +/* Create the va_list data type. */ + +static tree +ix86_build_builtin_va_list (void) +{ + tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; + + /* For i386 we use plain pointer to argument area. */ + if (!TARGET_64BIT) + return build_pointer_type (char_type_node); + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); + + f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), + unsigned_type_node); + f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), + unsigned_type_node); + f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), + ptr_type_node); + f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), + ptr_type_node); + + va_list_gpr_counter_field = f_gpr; + va_list_fpr_counter_field = f_fpr; + + DECL_FIELD_CONTEXT (f_gpr) = record; + DECL_FIELD_CONTEXT (f_fpr) = record; + DECL_FIELD_CONTEXT (f_ovf) = record; + DECL_FIELD_CONTEXT (f_sav) = record; + + TREE_CHAIN (record) = type_decl; + TYPE_NAME (record) = type_decl; + TYPE_FIELDS (record) = f_gpr; + TREE_CHAIN (f_gpr) = f_fpr; + TREE_CHAIN (f_fpr) = f_ovf; + TREE_CHAIN (f_ovf) = f_sav; + + layout_type (record); + + /* The correct type is an array type of one element. */ + return build_array_type (record, build_index_type (size_zero_node)); +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ + +static void +ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, int *pretend_size ATTRIBUTE_UNUSED, + int no_rtl) +{ + CUMULATIVE_ARGS next_cum; + rtx save_area = NULL_RTX, mem; + rtx label; + rtx label_ref; + rtx tmp_reg; + rtx nsse_reg; + int set; + tree fntype; + int stdarg_p; + int i; + + if (!TARGET_64BIT) + return; + + if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) + return; + + /* Indicate to allocate space on the stack for varargs save area. */ + ix86_save_varrargs_registers = 1; + + cfun->stack_alignment_needed = 128; + + fntype = TREE_TYPE (current_function_decl); + stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 + && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) + != void_type_node)); + + /* For varargs, we do not want to skip the dummy va_dcl argument. + For stdargs, we do want to skip the last named argument. */ + next_cum = *cum; + if (stdarg_p) + function_arg_advance (&next_cum, mode, type, 1); + + if (!no_rtl) + save_area = frame_pointer_rtx; + + set = get_varargs_alias_set (); + + for (i = next_cum.regno; + i < ix86_regparm + && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + i++) + { + mem = gen_rtx_MEM (Pmode, + plus_constant (save_area, i * UNITS_PER_WORD)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + emit_move_insn (mem, gen_rtx_REG (Pmode, + x86_64_int_parameter_registers[i])); + } + + if (next_cum.sse_nregs && cfun->va_list_fpr_size) + { + /* Now emit code to save SSE registers. The AX parameter contains number + of SSE parameter registers used to call this function. We use + sse_prologue_save insn template that produces computed jump across + SSE saves. We need some preparation work to get this working. */ + + label = gen_label_rtx (); + label_ref = gen_rtx_LABEL_REF (Pmode, label); + + /* Compute address to jump to : + label - 5*eax + nnamed_sse_arguments*5 */ + tmp_reg = gen_reg_rtx (Pmode); + nsse_reg = gen_reg_rtx (Pmode); + emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); + emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, + gen_rtx_MULT (Pmode, nsse_reg, + GEN_INT (4)))); + if (next_cum.sse_regno) + emit_move_insn + (nsse_reg, + gen_rtx_CONST (DImode, + gen_rtx_PLUS (DImode, + label_ref, + GEN_INT (next_cum.sse_regno * 4)))); + else + emit_move_insn (nsse_reg, label_ref); + emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); + + /* Compute address of memory block we save into. We always use pointer + pointing 127 bytes after first byte to store - this is needed to keep + instruction size limited by 4 bytes. */ + tmp_reg = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, + plus_constant (save_area, + 8 * REGPARM_MAX + 127))); + mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, BITS_PER_WORD); + + /* And finally do the dirty job! */ + emit_insn (gen_sse_prologue_save (mem, nsse_reg, + GEN_INT (next_cum.sse_regno), label)); + } + +} + +/* Implement va_start. */ + +void +ix86_va_start (tree valist, rtx nextarg) +{ + HOST_WIDE_INT words, n_gpr, n_fpr; + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, t; + tree type; + + /* Only 64bit target needs something special. */ + if (!TARGET_64BIT) + { + std_expand_builtin_va_start (valist, nextarg); + return; + } + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = TREE_CHAIN (f_gpr); + f_ovf = TREE_CHAIN (f_fpr); + f_sav = TREE_CHAIN (f_ovf); + + valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); + + /* Count number of gp and fp argument registers used. */ + words = current_function_args_info.words; + n_gpr = current_function_args_info.regno; + n_fpr = current_function_args_info.sse_regno; + + if (TARGET_DEBUG_ARG) + fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", + (int) words, (int) n_gpr, (int) n_fpr); + + if (cfun->va_list_gpr_size) + { + type = TREE_TYPE (gpr); + t = build2 (MODIFY_EXPR, type, gpr, + build_int_cst (type, n_gpr * 8)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + if (cfun->va_list_fpr_size) + { + type = TREE_TYPE (fpr); + t = build2 (MODIFY_EXPR, type, fpr, + build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + /* Find the overflow area. */ + type = TREE_TYPE (ovf); + t = make_tree (type, virtual_incoming_args_rtx); + if (words != 0) + t = build2 (PLUS_EXPR, type, t, + build_int_cst (type, words * UNITS_PER_WORD)); + t = build2 (MODIFY_EXPR, type, ovf, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) + { + /* Find the register save area. + Prologue of the function save it right above stack frame. */ + type = TREE_TYPE (sav); + t = make_tree (type, frame_pointer_rtx); + t = build2 (MODIFY_EXPR, type, sav, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } +} + +/* Implement va_arg. */ + +tree +ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) +{ + static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, t; + int size, rsize; + tree lab_false, lab_over = NULL_TREE; + tree addr, t2; + rtx container; + int indirect_p = 0; + tree ptrtype; + enum machine_mode nat_mode; + + /* Only 64bit target needs something special. */ + if (!TARGET_64BIT) + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = TREE_CHAIN (f_gpr); + f_ovf = TREE_CHAIN (f_fpr); + f_sav = TREE_CHAIN (f_ovf); + + valist = build_va_arg_indirect_ref (valist); + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); + + indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect_p) + type = build_pointer_type (type); + size = int_size_in_bytes (type); + rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + nat_mode = type_natural_mode (type); + container = construct_container (nat_mode, TYPE_MODE (type), type, 0, + REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); + + /* Pull the value out of the saved registers. */ + + addr = create_tmp_var (ptr_type_node, "addr"); + DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); + + if (container) + { + int needed_intregs, needed_sseregs; + bool need_temp; + tree int_addr, sse_addr; + + lab_false = create_artificial_label (); + lab_over = create_artificial_label (); + + examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); + + need_temp = (!REG_P (container) + && ((needed_intregs && TYPE_ALIGN (type) > 64) + || TYPE_ALIGN (type) > 128)); + + /* In case we are passing structure, verify that it is consecutive block + on the register save area. If not we need to do moves. */ + if (!need_temp && !REG_P (container)) + { + /* Verify that all registers are strictly consecutive */ + if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) + { + int i; + + for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) + { + rtx slot = XVECEXP (container, 0, i); + if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i + || INTVAL (XEXP (slot, 1)) != i * 16) + need_temp = 1; + } + } + else + { + int i; + + for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) + { + rtx slot = XVECEXP (container, 0, i); + if (REGNO (XEXP (slot, 0)) != (unsigned int) i + || INTVAL (XEXP (slot, 1)) != i * 8) + need_temp = 1; + } + } + } + if (!need_temp) + { + int_addr = addr; + sse_addr = addr; + } + else + { + int_addr = create_tmp_var (ptr_type_node, "int_addr"); + DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); + sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); + DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); + } + + /* First ensure that we fit completely in registers. */ + if (needed_intregs) + { + t = build_int_cst (TREE_TYPE (gpr), + (REGPARM_MAX - needed_intregs + 1) * 8); + t = build2 (GE_EXPR, boolean_type_node, gpr, t); + t2 = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); + gimplify_and_add (t, pre_p); + } + if (needed_sseregs) + { + t = build_int_cst (TREE_TYPE (fpr), + (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 + + REGPARM_MAX * 8); + t = build2 (GE_EXPR, boolean_type_node, fpr, t); + t2 = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); + gimplify_and_add (t, pre_p); + } + + /* Compute index to start of area used for integer regs. */ + if (needed_intregs) + { + /* int_addr = gpr + sav; */ + t = fold_convert (ptr_type_node, gpr); + t = build2 (PLUS_EXPR, ptr_type_node, sav, t); + t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); + gimplify_and_add (t, pre_p); + } + if (needed_sseregs) + { + /* sse_addr = fpr + sav; */ + t = fold_convert (ptr_type_node, fpr); + t = build2 (PLUS_EXPR, ptr_type_node, sav, t); + t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); + gimplify_and_add (t, pre_p); + } + if (need_temp) + { + int i; + tree temp = create_tmp_var (type, "va_arg_tmp"); + + /* addr = &temp; */ + t = build1 (ADDR_EXPR, build_pointer_type (type), temp); + t = build2 (MODIFY_EXPR, void_type_node, addr, t); + gimplify_and_add (t, pre_p); + + for (i = 0; i < XVECLEN (container, 0); i++) + { + rtx slot = XVECEXP (container, 0, i); + rtx reg = XEXP (slot, 0); + enum machine_mode mode = GET_MODE (reg); + tree piece_type = lang_hooks.types.type_for_mode (mode, 1); + tree addr_type = build_pointer_type (piece_type); + tree src_addr, src; + int src_offset; + tree dest_addr, dest; + + if (SSE_REGNO_P (REGNO (reg))) + { + src_addr = sse_addr; + src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; + } + else + { + src_addr = int_addr; + src_offset = REGNO (reg) * 8; + } + src_addr = fold_convert (addr_type, src_addr); + src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, + size_int (src_offset))); + src = build_va_arg_indirect_ref (src_addr); + + dest_addr = fold_convert (addr_type, addr); + dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, + size_int (INTVAL (XEXP (slot, 1))))); + dest = build_va_arg_indirect_ref (dest_addr); + + t = build2 (MODIFY_EXPR, void_type_node, dest, src); + gimplify_and_add (t, pre_p); + } + } + + if (needed_intregs) + { + t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, + build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); + t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); + gimplify_and_add (t, pre_p); + } + if (needed_sseregs) + { + t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, + build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); + t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); + gimplify_and_add (t, pre_p); + } + + t = build1 (GOTO_EXPR, void_type_node, lab_over); + gimplify_and_add (t, pre_p); + + t = build1 (LABEL_EXPR, void_type_node, lab_false); + append_to_statement_list (t, pre_p); + } + + /* ... otherwise out of the overflow area. */ + + /* Care for on-stack alignment if needed. */ + if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 + || integer_zerop (TYPE_SIZE (type))) + t = ovf; + else + { + HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; + t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, + build_int_cst (TREE_TYPE (ovf), align - 1)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -align)); + } + gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); + + t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); + gimplify_and_add (t2, pre_p); + + t = build2 (PLUS_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); + t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); + gimplify_and_add (t, pre_p); + + if (container) + { + t = build1 (LABEL_EXPR, void_type_node, lab_over); + append_to_statement_list (t, pre_p); + } + + ptrtype = build_pointer_type (type); + addr = fold_convert (ptrtype, addr); + + if (indirect_p) + addr = build_va_arg_indirect_ref (addr); + return build_va_arg_indirect_ref (addr); +} + +/* Return nonzero if OPNUM's MEM should be matched + in movabs* patterns. */ + +int +ix86_check_movabs (rtx insn, int opnum) +{ + rtx set, mem; + + set = PATTERN (insn); + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + gcc_assert (GET_CODE (set) == SET); + mem = XEXP (set, opnum); + while (GET_CODE (mem) == SUBREG) + mem = SUBREG_REG (mem); + gcc_assert (GET_CODE (mem) == MEM); + return (volatile_ok || !MEM_VOLATILE_P (mem)); +} + +/* Initialize the table of extra 80387 mathematical constants. */ + +static void +init_ext_80387_constants (void) +{ + static const char * cst[5] = + { + "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ + "0.6931471805599453094286904741849753009", /* 1: fldln2 */ + "1.4426950408889634073876517827983434472", /* 2: fldl2e */ + "3.3219280948873623478083405569094566090", /* 3: fldl2t */ + "3.1415926535897932385128089594061862044", /* 4: fldpi */ + }; + int i; + + for (i = 0; i < 5; i++) + { + real_from_string (&ext_80387_constants_table[i], cst[i]); + /* Ensure each constant is rounded to XFmode precision. */ + real_convert (&ext_80387_constants_table[i], + XFmode, &ext_80387_constants_table[i]); + } + + ext_80387_constants_init = 1; +} + +/* Return true if the constant is something that can be loaded with + a special instruction. */ + +int +standard_80387_constant_p (rtx x) +{ + if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) + return -1; + + if (x == CONST0_RTX (GET_MODE (x))) + return 1; + if (x == CONST1_RTX (GET_MODE (x))) + return 2; + + /* For XFmode constants, try to find a special 80387 instruction when + optimizing for size or on those CPUs that benefit from them. */ + if (GET_MODE (x) == XFmode + && (optimize_size || x86_ext_80387_constants & TUNEMASK)) + { + REAL_VALUE_TYPE r; + int i; + + if (! ext_80387_constants_init) + init_ext_80387_constants (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + for (i = 0; i < 5; i++) + if (real_identical (&r, &ext_80387_constants_table[i])) + return i + 3; + } + + return 0; +} + +/* Return the opcode of the special instruction to be used to load + the constant X. */ + +const char * +standard_80387_constant_opcode (rtx x) +{ + switch (standard_80387_constant_p (x)) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + case 3: + return "fldlg2"; + case 4: + return "fldln2"; + case 5: + return "fldl2e"; + case 6: + return "fldl2t"; + case 7: + return "fldpi"; + default: + gcc_unreachable (); + } +} + +/* Return the CONST_DOUBLE representing the 80387 constant that is + loaded by the specified special instruction. The argument IDX + matches the return value from standard_80387_constant_p. */ + +rtx +standard_80387_constant_rtx (int idx) +{ + int i; + + if (! ext_80387_constants_init) + init_ext_80387_constants (); + + switch (idx) + { + case 3: + case 4: + case 5: + case 6: + case 7: + i = idx - 3; + break; + + default: + gcc_unreachable (); + } + + return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], + XFmode); +} + +/* Return 1 if mode is a valid mode for sse. */ +static int +standard_sse_mode_p (enum machine_mode mode) +{ + switch (mode) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V4SFmode: + case V2DFmode: + return 1; + + default: + return 0; + } +} + +/* Return 1 if X is FP constant we can load to SSE register w/o using memory. + */ +int +standard_sse_constant_p (rtx x) +{ + enum machine_mode mode = GET_MODE (x); + + if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) + return 1; + if (vector_all_ones_operand (x, mode) + && standard_sse_mode_p (mode)) + return TARGET_SSE2 ? 2 : -1; + + return 0; +} + +/* Return the opcode of the special instruction to be used to load + the constant X. */ + +const char * +standard_sse_constant_opcode (rtx insn, rtx x) +{ + switch (standard_sse_constant_p (x)) + { + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else if (get_attr_mode (insn) == MODE_V2DF) + return "xorpd\t%0, %0"; + else + return "pxor\t%0, %0"; + case 2: + return "pcmpeqd\t%0, %0"; + } + gcc_unreachable (); +} + +/* Returns 1 if OP contains a symbol reference */ + +int +symbolic_reference_mentioned_p (rtx op) +{ + const char *fmt; + int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return 1; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return 1; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return 1; + } + + return 0; +} + +/* Return 1 if it is appropriate to emit `ret' instructions in the + body of a function. Do this only if the epilogue is simple, needing a + couple of insns. Prior to reloading, we can't tell how many registers + must be saved, so return 0 then. Return 0 if there is no frame + marker to de-allocate. */ + +int +ix86_can_use_return_insn_p (void) +{ + struct ix86_frame frame; + + if (! reload_completed || frame_pointer_needed) + return 0; + + /* Don't allow more than 32 pop, since that's all we can do + with one instruction. */ + if (current_function_pops_args + && current_function_args_size >= 32768) + return 0; + + ix86_compute_frame_layout (&frame); + return frame.to_allocate == 0 && frame.nregs == 0; +} + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may + be accessed via the stack pointer) in functions that seem suitable. */ + +int +ix86_frame_pointer_required (void) +{ + /* If we accessed previous frames, then the generated code expects + to be able to access the saved ebp value in our frame. */ + if (cfun->machine->accesses_prev_frame) + return 1; + + /* Several x86 os'es need a frame pointer for other reasons, + usually pertaining to setjmp. */ + if (SUBTARGET_FRAME_POINTER_REQUIRED) + return 1; + + /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off + the frame pointer by default. Turn it back on now if we've not + got a leaf function. */ + if (TARGET_OMIT_LEAF_FRAME_POINTER + && (!current_function_is_leaf + || ix86_current_function_calls_tls_descriptor)) + return 1; + + if (current_function_profile) + return 1; + + return 0; +} + +/* Record that the current function accesses previous call frames. */ + +void +ix86_setup_frame_addresses (void) +{ + cfun->machine->accesses_prev_frame = 1; +} + +#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +/* APPLE LOCAL 5695218 */ +static GTY(()) int pic_labels_used; + +/* Fills in the label name that should be used for a pc thunk for + the given register. */ + +static void +get_pc_thunk_name (char name[32], unsigned int regno) +{ + gcc_assert (!TARGET_64BIT); + + /* APPLE LOCAL deep branch prediction pic-base. */ + if (USE_HIDDEN_LINKONCE || TARGET_MACHO) + sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); +} + + +/* This function generates code for -fpic that loads %ebx with + the return address of the caller and then returns. */ + +void +ix86_file_end (void) +{ + rtx xops[2]; + int regno; + + for (regno = 0; regno < 8; ++regno) + { + char name[32]; + + if (! ((pic_labels_used >> regno) & 1)) + continue; + + get_pc_thunk_name (name, regno); + +#if TARGET_MACHO + if (TARGET_MACHO) + { + switch_to_section (darwin_sections[text_coal_section]); + fputs ("\t.weak_definition\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n\t.private_extern\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n", asm_out_file); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + else +#endif + if (USE_HIDDEN_LINKONCE) + { + tree decl; + + decl = build_decl (FUNCTION_DECL, get_identifier (name), + error_mark_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + DECL_ONE_ONLY (decl) = 1; + + (*targetm.asm_out.unique_section) (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + + (*targetm.asm_out.globalize_label) (asm_out_file, name); + fputs ("\t.hidden\t", asm_out_file); + assemble_name (asm_out_file, name); + fputc ('\n', asm_out_file); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + /* APPLE LOCAL begin deep branch prediction pic-base */ +#if TARGET_MACHO + else if (TARGET_MACHO) + { + switch_to_section (darwin_sections[text_coal_section]); + fputs (".weak_definition\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n.private_extern\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n", asm_out_file); + ASM_OUTPUT_LABEL (asm_out_file, name); + } +#endif + /* APPLE LOCAL end deep branch prediction pic-base */ + else + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + + xops[0] = gen_rtx_REG (SImode, regno); + xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); + output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); + output_asm_insn ("ret", xops); + } + + if (NEED_INDICATE_EXEC_STACK) + file_end_indicate_exec_stack (); +} + +/* Emit code for the SET_GOT patterns. */ + +const char * +output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) +{ + rtx xops[3]; + + xops[0] = dest; + xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + + if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) + { + xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); + + if (!flag_pic) + output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); + else + /* APPLE LOCAL begin dwarf call/pop 5221468 */ + { + output_asm_insn ("call\t%a2", xops); + + /* If necessary, report the effect that the instruction has on + the unwind info. */ +#if defined (DWARF2_UNWIND_INFO) + if (flag_asynchronous_unwind_tables +#if !defined (HAVE_prologue) + && !ACCUMULATE_OUTGOING_ARGS +#endif + && dwarf2out_do_frame ()) + { + rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (-4))); + insn = make_insn_raw (insn); + RTX_FRAME_RELATED_P (insn) = 1; + dwarf2out_frame_debug (insn, true); + } +#endif + } + /* APPLE LOCAL end dwarf call/pop 5221468 */ + +#if TARGET_MACHO + /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This + is what will be referenced by the Mach-O PIC subsystem. */ + if (!label) + ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); +#endif + + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (XEXP (xops[2], 0))); + + if (flag_pic) + /* APPLE LOCAL begin dwarf call/pop 5221468 */ + { + output_asm_insn ("pop{l}\t%0", xops); + + /* If necessary, report the effect that the instruction has on + the unwind info. We've already done this for delay slots + and call instructions. */ +#if defined (DWARF2_UNWIND_INFO) + if (flag_asynchronous_unwind_tables +#if !defined (HAVE_prologue) + && !ACCUMULATE_OUTGOING_ARGS +#endif + && dwarf2out_do_frame ()) + { + rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (4))); + insn = make_insn_raw (insn); + RTX_FRAME_RELATED_P (insn) = 1; + dwarf2out_frame_debug (insn, true); + } +#endif + } + /* APPLE LOCAL end dwarf call/pop 5221468 */ + } + else + { + char name[32]; + get_pc_thunk_name (name, REGNO (dest)); + pic_labels_used |= 1 << REGNO (dest); + + xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); + xops[2] = gen_rtx_MEM (QImode, xops[2]); + output_asm_insn ("call\t%X2", xops); + /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This + is what will be referenced by the Mach-O PIC subsystem. */ +#if TARGET_MACHO + if (!label) + ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); + else + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); +#endif + } + + if (TARGET_MACHO) + return ""; + + if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) + output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); + else + output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); + + return ""; +} + +/* Generate an "push" pattern for input ARG. */ + +static rtx +gen_push (rtx arg) +{ + return gen_rtx_SET (VOIDmode, + gen_rtx_MEM (Pmode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + arg); +} + +/* Return >= 0 if there is an unused call-clobbered register available + for the entire function. */ + +static unsigned int +ix86_select_alt_pic_regnum (void) +{ + if (current_function_is_leaf && !current_function_profile + && !ix86_current_function_calls_tls_descriptor) + { + int i; + for (i = 2; i >= 0; --i) + if (!regs_ever_live[i]) + return i; + } + + return INVALID_REGNUM; +} + +/* APPLE LOCAL begin 5695218 */ +/* Reload may introduce references to the PIC base register + that do not directly reference pic_offset_table_rtx. + In the rare event we choose an alternate PIC register, + walk all the insns and rewrite every reference. */ +/* Run through the insns, changing references to the original + PIC_OFFSET_TABLE_REGNUM to our new one. */ +static void +ix86_globally_replace_pic_reg (unsigned int new_pic_regno) +{ + rtx insn; + const int nregs = PIC_OFFSET_TABLE_REGNUM + 1; + rtx reg_map[FIRST_PSEUDO_REGISTER]; + memset (reg_map, 0, nregs * sizeof (rtx)); + pic_offset_table_rtx = gen_rtx_REG (SImode, new_pic_regno); + reg_map[REAL_PIC_OFFSET_TABLE_REGNUM] = pic_offset_table_rtx; + + push_topmost_sequence (); + for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) + { + replace_regs (PATTERN (insn), reg_map, nregs, 1); + replace_regs (REG_NOTES (insn), reg_map, nregs, 1); + } +#if defined (TARGET_TOC) + else if (GET_CODE (insn) == CALL_INSN) + { + if ( !SIBLING_CALL_P (insn)) + abort (); + } +#endif + } + pop_topmost_sequence (); + + regs_ever_live[new_pic_regno] = 1; + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0; +#if defined (TARGET_TOC) + cfun->machine->substitute_pic_base_reg = new_pic_regno; +#endif +} +/* APPLE LOCAL end 5695218 */ + +/* Return 1 if we need to save REGNO. */ +static int +ix86_save_reg (unsigned int regno, int maybe_eh_return) +{ + /* APPLE LOCAL begin CW asm blocks */ + /* For an asm function, we don't save any registers, instead, the + user is responsible. */ + if (cfun->iasm_asm_function) + return 0; + /* APPLE LOCAL end CW asm blocks */ + + if (pic_offset_table_rtx + && regno == REAL_PIC_OFFSET_TABLE_REGNUM + /* APPLE LOCAL begin 5695218 */ + && (current_function_uses_pic_offset_table + || current_function_profile + || current_function_calls_eh_return + || current_function_uses_const_pool)) + /* APPLE LOCAL end 5695218 */ + { + if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) + return 0; + return 1; + } + + if (current_function_calls_eh_return && maybe_eh_return) + { + unsigned i; + for (i = 0; ; i++) + { + unsigned test = EH_RETURN_DATA_REGNO (i); + if (test == INVALID_REGNUM) + break; + if (test == regno) + return 1; + } + } + + if (cfun->machine->force_align_arg_pointer + && regno == REGNO (cfun->machine->force_align_arg_pointer)) + return 1; + + /* APPLE LOCAL begin 5695218 */ + /* In order to get accurate usage info for the PIC register, we've + been forced to break and un-break the call_used_regs and + fixed_regs vectors. Ignore them when considering the PIC + register. */ + if (regno == REAL_PIC_OFFSET_TABLE_REGNUM + && regs_ever_live[regno]) + return 1; + /* APPLE LOCAL end 5695218 */ + + return (regs_ever_live[regno] + && !call_used_regs[regno] + && !fixed_regs[regno] + && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); +} + +/* Return number of registers to be saved on the stack. */ + +static int +ix86_nsaved_regs (void) +{ + int nregs = 0; + int regno; + + for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) + if (ix86_save_reg (regno, true)) + nregs++; + return nregs; +} + +/* Return the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +HOST_WIDE_INT +ix86_initial_elimination_offset (int from, int to) +{ + struct ix86_frame frame; + ix86_compute_frame_layout (&frame); + + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return frame.hard_frame_pointer_offset; + else if (from == FRAME_POINTER_REGNUM + && to == HARD_FRAME_POINTER_REGNUM) + return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; + else + { + gcc_assert (to == STACK_POINTER_REGNUM); + + if (from == ARG_POINTER_REGNUM) + return frame.stack_pointer_offset; + + gcc_assert (from == FRAME_POINTER_REGNUM); + return frame.stack_pointer_offset - frame.frame_pointer_offset; + } +} + +/* Fill structure ix86_frame about frame of currently computed function. */ + +static void +ix86_compute_frame_layout (struct ix86_frame *frame) +{ + HOST_WIDE_INT total_size; + unsigned int stack_alignment_needed; + HOST_WIDE_INT offset; + unsigned int preferred_alignment; + HOST_WIDE_INT size = get_frame_size (); + + frame->nregs = ix86_nsaved_regs (); + total_size = size; + + stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; + preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; + + /* During reload iteration the amount of registers saved can change. + Recompute the value as needed. Do not recompute when amount of registers + didn't change as reload does multiple calls to the function and does not + expect the decision to change within single iteration. */ + if (!optimize_size + && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) + { + int count = frame->nregs; + + cfun->machine->use_fast_prologue_epilogue_nregs = count; + /* The fast prologue uses move instead of push to save registers. This + is significantly longer, but also executes faster as modern hardware + can execute the moves in parallel, but can't do that for push/pop. + + Be careful about choosing what prologue to emit: When function takes + many instructions to execute we may use slow version as well as in + case function is known to be outside hot spot (this is known with + feedback only). Weight the size of function by number of registers + to save as it is cheap to use one or two push instructions but very + slow to use many of them. */ + if (count) + count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; + if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL + || (flag_branch_probabilities + && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) + cfun->machine->use_fast_prologue_epilogue = false; + else + cfun->machine->use_fast_prologue_epilogue + = !expensive_function_p (count); + } + if (TARGET_PROLOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue) + frame->save_regs_using_mov = true; + else + frame->save_regs_using_mov = false; + + + /* Skip return address and saved base pointer. */ + offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; + + frame->hard_frame_pointer_offset = offset; + + /* Do some sanity checking of stack_alignment_needed and + preferred_alignment, since i386 port is the only using those features + that may break easily. */ + + gcc_assert (!size || stack_alignment_needed); + gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); + gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); + gcc_assert (stack_alignment_needed + <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); + + if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) + stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; + + /* Register save area */ + offset += frame->nregs * UNITS_PER_WORD; + + /* Va-arg area */ + if (ix86_save_varrargs_registers) + { + offset += X86_64_VARARGS_SIZE; + frame->va_arg_size = X86_64_VARARGS_SIZE; + } + else + frame->va_arg_size = 0; + + /* Align start of frame for local function. */ + frame->padding1 = ((offset + stack_alignment_needed - 1) + & -stack_alignment_needed) - offset; + + offset += frame->padding1; + + /* Frame pointer points here. */ + frame->frame_pointer_offset = offset; + + offset += size; + + /* Add outgoing arguments area. Can be skipped if we eliminated + all the function calls as dead code. + Skipping is however impossible when function calls alloca. Alloca + expander assumes that last current_function_outgoing_args_size + of stack frame are unused. */ + if (ACCUMULATE_OUTGOING_ARGS + && (!current_function_is_leaf || current_function_calls_alloca + || ix86_current_function_calls_tls_descriptor)) + { + offset += current_function_outgoing_args_size; + frame->outgoing_arguments_size = current_function_outgoing_args_size; + } + else + frame->outgoing_arguments_size = 0; + + /* Align stack boundary. Only needed if we're calling another function + or using alloca. */ + if (!current_function_is_leaf || current_function_calls_alloca + || ix86_current_function_calls_tls_descriptor) + frame->padding2 = ((offset + preferred_alignment - 1) + & -preferred_alignment) - offset; + else + frame->padding2 = 0; + + offset += frame->padding2; + + /* We've reached end of stack frame. */ + frame->stack_pointer_offset = offset; + + /* Size prologue needs to allocate. */ + frame->to_allocate = + (size + frame->padding1 + frame->padding2 + + frame->outgoing_arguments_size + frame->va_arg_size); + + if ((!frame->to_allocate && frame->nregs <= 1) + || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) + frame->save_regs_using_mov = false; + + if (TARGET_RED_ZONE && current_function_sp_is_unchanging + && current_function_is_leaf + && !ix86_current_function_calls_tls_descriptor) + { + frame->red_zone_size = frame->to_allocate; + if (frame->save_regs_using_mov) + frame->red_zone_size += frame->nregs * UNITS_PER_WORD; + if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) + frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; + } + else + frame->red_zone_size = 0; + frame->to_allocate -= frame->red_zone_size; + frame->stack_pointer_offset -= frame->red_zone_size; +#if 0 + fprintf (stderr, "nregs: %i\n", frame->nregs); + fprintf (stderr, "size: %i\n", size); + fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); + fprintf (stderr, "padding1: %i\n", frame->padding1); + fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); + fprintf (stderr, "padding2: %i\n", frame->padding2); + fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); + fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); + fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); + fprintf (stderr, "hard_frame_pointer_offset: %i\n", + frame->hard_frame_pointer_offset); + fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); +#endif +} + +/* Emit code to save registers in the prologue. */ + +static void +ix86_emit_save_regs (void) +{ + unsigned int regno; + rtx insn; + + for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) + if (ix86_save_reg (regno, true)) + { + insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Emit code to save registers using MOV insns. First register + is restored from POINTER + OFFSET. */ +static void +ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) +{ + unsigned int regno; + rtx insn; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (ix86_save_reg (regno, true)) + { + insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), + Pmode, offset), + gen_rtx_REG (Pmode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + offset += UNITS_PER_WORD; + } +} + +/* Expand prologue or epilogue stack adjustment. + The pattern exist to put a dependency on all ebp-based memory accesses. + STYLE should be negative if instructions should be marked as frame related, + zero if %r11 register is live and cannot be freely used and positive + otherwise. */ + +static void +pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) +{ + rtx insn; + + if (! TARGET_64BIT) + insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); + else if (x86_64_immediate_operand (offset, DImode)) + insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); + else + { + rtx r11; + /* r11 is used by indirect sibcall return as well, set before the + epilogue and used after the epilogue. ATM indirect sibcall + shouldn't be used together with huge frame sizes in one + function because of the frame_size check in sibcall.c. */ + gcc_assert (style); + r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); + insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); + /* APPLE LOCAL async unwind info 5949469 */ + if (style < 0 /* || flag_asynchronous_unwind_tables*/) + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, + offset)); + } + if (style < 0) + RTX_FRAME_RELATED_P (insn) = 1; + /* APPLE LOCAL begin async unwind info 5949350 5949469 */ +#if 0 + else if (flag_asynchronous_unwind_tables + && (src == hard_frame_pointer_rtx + || src == stack_pointer_rtx)) + RTX_FRAME_RELATED_P (insn) = 1; +#endif + /* APPLE LOCAL end async unwind info 5949350 5949469 */ +} + +/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ + +static rtx +ix86_internal_arg_pointer (void) +{ + bool has_force_align_arg_pointer = + (0 != lookup_attribute (ix86_force_align_arg_pointer_string, + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); + if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN + && DECL_NAME (current_function_decl) + && MAIN_NAME_P (DECL_NAME (current_function_decl)) + && DECL_FILE_SCOPE_P (current_function_decl)) + || ix86_force_align_arg_pointer + || has_force_align_arg_pointer) + { + /* Nested functions can't realign the stack due to a register + conflict. */ + if (DECL_CONTEXT (current_function_decl) + && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) + { + if (ix86_force_align_arg_pointer) + warning (0, "-mstackrealign ignored for nested functions"); + if (has_force_align_arg_pointer) + error ("%s not supported for nested functions", + ix86_force_align_arg_pointer_string); + return virtual_incoming_args_rtx; + } + cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); + return copy_to_reg (cfun->machine->force_align_arg_pointer); + } + else + return virtual_incoming_args_rtx; +} + +/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. + This is called from dwarf2out.c to emit call frame instructions + for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ +static void +ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) +{ + rtx unspec = SET_SRC (pattern); + gcc_assert (GET_CODE (unspec) == UNSPEC); + + switch (index) + { + case UNSPEC_REG_SAVE: + dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), + SET_DEST (pattern)); + break; + case UNSPEC_DEF_CFA: + dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), + INTVAL (XVECEXP (unspec, 0, 0))); + break; + default: + gcc_unreachable (); + } +} + +/* APPLE LOCAL begin 3399553 */ +/* Calculate the value of FLT_ROUNDS into DEST. + + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) +*/ +void +ix86_expand_flt_rounds (rtx dest) +{ + rtx mem = assign_stack_temp (HImode, GET_MODE_SIZE (HImode), 0); + rtx temp = gen_reg_rtx (SImode); + + /* Step #1: Read FPSR. Unfortunately, this can only be done into a + 16-bit memory location. */ + emit_insn (gen_x86_fnstcw_1 (mem)); + + /* Step #2: Copy into a register. */ + emit_insn (gen_zero_extendhisi2 (dest, mem)); + + /* Step #3: Perform conversion described above. */ + emit_insn (gen_andsi3 (temp, dest, GEN_INT (0x400))); + emit_insn (gen_andsi3 (dest, dest, GEN_INT (0x800))); + emit_insn (gen_lshrsi3 (temp, temp, GEN_INT (9))); + emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (11))); + emit_insn (gen_iorsi3 (dest, dest, temp)); + emit_insn (gen_addsi3 (dest, dest, const1_rtx)); + emit_insn (gen_andsi3 (dest, dest, GEN_INT (3))); +} +/* APPLE LOCAL end 3399553 */ + +/* APPLE LOCAL begin fix-and-continue x86 */ +#ifndef TARGET_FIX_AND_CONTINUE +#define TARGET_FIX_AND_CONTINUE 0 +#endif +/* APPLE LOCAL end fix-and-continue x86 */ + +/* Expand the prologue into a bunch of separate insns. */ + +void +ix86_expand_prologue (void) +{ + rtx insn; + bool pic_reg_used; + struct ix86_frame frame; + HOST_WIDE_INT allocate; + + /* APPLE LOCAL begin fix-and-continue x86 */ + if (TARGET_FIX_AND_CONTINUE) + { + /* gdb on darwin arranges to forward a function from the old + address by modifying the first 6 instructions of the function + to branch to the overriding function. This is necessary to + permit function pointers that point to the old function to + actually forward to the new function. */ + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + } + /* APPLE LOCAL end fix-and-continue x86 */ + + ix86_compute_frame_layout (&frame); + + if (cfun->machine->force_align_arg_pointer) + { + rtx x, y; + + /* Grab the argument pointer. */ + x = plus_constant (stack_pointer_rtx, 4); + y = cfun->machine->force_align_arg_pointer; + insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* The unwind info consists of two parts: install the fafp as the cfa, + and record the fafp as the "save register" of the stack pointer. + The later is there in order that the unwinder can see where it + should restore the stack pointer across the and insn. */ + x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); + x = gen_rtx_SET (VOIDmode, y, x); + RTX_FRAME_RELATED_P (x) = 1; + y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), + UNSPEC_REG_SAVE); + y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); + RTX_FRAME_RELATED_P (y) = 1; + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); + x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); + REG_NOTES (insn) = x; + + /* Align the stack. */ + emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-16))); + + /* And here we cheat like madmen with the unwind info. We force the + cfa register back to sp+4, which is exactly what it was at the + start of the function. Re-pushing the return address results in + the return at the same spot relative to the cfa, and thus is + correct wrt the unwind info. */ + x = cfun->machine->force_align_arg_pointer; + x = gen_frame_mem (Pmode, plus_constant (x, -4)); + insn = emit_insn (gen_push (x)); + RTX_FRAME_RELATED_P (insn) = 1; + + x = GEN_INT (4); + x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); + x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); + x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); + REG_NOTES (insn) = x; + } + + /* Note: AT&T enter does NOT have reversed args. Enter is probably + slower on all targets. Also sdb doesn't like it. */ + + if (frame_pointer_needed) + { + insn = emit_insn (gen_push (hard_frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + allocate = frame.to_allocate; + + if (!frame.save_regs_using_mov) + ix86_emit_save_regs (); + else + allocate += frame.nregs * UNITS_PER_WORD; + + /* When using red zone we may start register saving before allocating + the stack frame saving one cycle of the prologue. */ + if (TARGET_RED_ZONE && frame.save_regs_using_mov) + ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx + : stack_pointer_rtx, + -frame.nregs * UNITS_PER_WORD); + + if (allocate == 0) + ; + else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) + /* APPLE LOCAL begin CW asm blocks */ + { + if (! cfun->iasm_asm_function) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-allocate), -1); + } + /* APPLE LOCAL end CW asm blocks */ + else + { + /* Only valid for Win32. */ + rtx eax = gen_rtx_REG (SImode, 0); + bool eax_live = ix86_eax_live_at_start_p (); + rtx t; + + gcc_assert (!TARGET_64BIT); + + if (eax_live) + { + emit_insn (gen_push (eax)); + allocate -= 4; + } + + emit_move_insn (eax, GEN_INT (allocate)); + + insn = emit_insn (gen_allocate_stack_worker (eax)); + RTX_FRAME_RELATED_P (insn) = 1; + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); + t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + t, REG_NOTES (insn)); + + if (eax_live) + { + if (frame_pointer_needed) + t = plus_constant (hard_frame_pointer_rtx, + allocate + - frame.to_allocate + - frame.nregs * UNITS_PER_WORD); + else + t = plus_constant (stack_pointer_rtx, allocate); + emit_move_insn (eax, gen_rtx_MEM (SImode, t)); + } + } + + if (frame.save_regs_using_mov && !TARGET_RED_ZONE) + { + if (!frame_pointer_needed || !frame.to_allocate) + ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); + else + ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, + -frame.nregs * UNITS_PER_WORD); + } + + pic_reg_used = false; + /* APPLE LOCAL begin 5695218 */ + if (pic_offset_table_rtx && regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] + && !TARGET_64BIT) + { + unsigned int alt_pic_reg_used; + + alt_pic_reg_used = ix86_select_alt_pic_regnum (); + /* APPLE LOCAL end 5695218 */ + + if (alt_pic_reg_used != INVALID_REGNUM) + /* APPLE LOCAL begin 5695218 */ + /* REGNO (pic_offset_table_rtx) = alt_pic_reg_used; */ + ix86_globally_replace_pic_reg (alt_pic_reg_used); + /* APPLE LOCAL end 5695218 */ + + pic_reg_used = true; + } + + if (pic_reg_used) + { + if (TARGET_64BIT) + insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); + else + insn = emit_insn (gen_set_got (pic_offset_table_rtx)); + + /* Even with accurate pre-reload life analysis, we can wind up + deleting all references to the pic register after reload. + Consider if cross-jumping unifies two sides of a branch + controlled by a comparison vs the only read from a global. + In which case, allow the set_got to be deleted, though we're + too late to do anything about the ebx save in the prologue. */ + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); + } + + /* Prevent function calls from be scheduled before the call to mcount. + In the pic_reg_used case, make sure that the got load isn't deleted. */ + if (current_function_profile) + emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); +} + +/* Emit code to restore saved registers using MOV insns. First register + is restored from POINTER + OFFSET. */ +static void +ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, + int maybe_eh_return) +{ + int regno; + rtx base_address = gen_rtx_MEM (Pmode, pointer); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (ix86_save_reg (regno, maybe_eh_return)) + { + /* Ensure that adjust_address won't be forced to produce pointer + out of range allowed by x86-64 instruction set. */ + if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) + { + rtx r11; + + r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); + emit_move_insn (r11, GEN_INT (offset)); + emit_insn (gen_adddi3 (r11, r11, pointer)); + base_address = gen_rtx_MEM (Pmode, r11); + offset = 0; + } + emit_move_insn (gen_rtx_REG (Pmode, regno), + adjust_address (base_address, Pmode, offset)); + offset += UNITS_PER_WORD; + } +} + +/* Restore function stack, frame, and registers. */ + +void +ix86_expand_epilogue (int style) +{ + int regno; + int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; + struct ix86_frame frame; + HOST_WIDE_INT offset; + + ix86_compute_frame_layout (&frame); + + /* Calculate start of saved registers relative to ebp. Special care + must be taken for the normal return case of a function using + eh_return: the eax and edx registers are marked as saved, but not + restored along this path. */ + offset = frame.nregs; + if (current_function_calls_eh_return && style != 2) + offset -= 2; + offset *= -UNITS_PER_WORD; + + /* APPLE LOCAL begin CW asm blocks */ + /* For an asm function, don't generate an epilogue. */ + if (cfun->iasm_asm_function) + { + emit_jump_insn (gen_return_internal ()); + return; + } + /* APPLE LOCAL end CW asm blocks */ + + /* If we're only restoring one register and sp is not valid then + using a move instruction to restore the register since it's + less work than reloading sp and popping the register. + + The default code result in stack adjustment using add/lea instruction, + while this code results in LEAVE instruction (or discrete equivalent), + so it is profitable in some other cases as well. Especially when there + are no registers to restore. We also use this code when TARGET_USE_LEAVE + and there is exactly one register to pop. This heuristic may need some + tuning in future. */ + if ((!sp_valid && frame.nregs <= 1) + || (TARGET_EPILOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue + && (frame.nregs > 1 || frame.to_allocate)) + || (frame_pointer_needed && !frame.nregs && frame.to_allocate) + || (frame_pointer_needed && TARGET_USE_LEAVE + && cfun->machine->use_fast_prologue_epilogue + && frame.nregs == 1) + || current_function_calls_eh_return) + { + /* Restore registers. We can use ebp or esp to address the memory + locations. If both are available, default to ebp, since offsets + are known to be small. Only exception is esp pointing directly to the + end of block of saved registers, where we may simplify addressing + mode. */ + + if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) + ix86_emit_restore_regs_using_mov (stack_pointer_rtx, + frame.to_allocate, style == 2); + else + ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, + offset, style == 2); + + /* eh_return epilogues need %ecx added to the stack pointer. */ + if (style == 2) + { + rtx tmp, sa = EH_RETURN_STACKADJ_RTX; + + if (frame_pointer_needed) + { + tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); + tmp = plus_constant (tmp, UNITS_PER_WORD); + emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); + + tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); + emit_move_insn (hard_frame_pointer_rtx, tmp); + + pro_epilogue_adjust_stack (stack_pointer_rtx, sa, + const0_rtx, style); + } + else + { + tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); + tmp = plus_constant (tmp, (frame.to_allocate + + frame.nregs * UNITS_PER_WORD)); + emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); + } + } + else if (!frame_pointer_needed) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame.to_allocate + + frame.nregs * UNITS_PER_WORD), + style); + /* If not an i386, mov & pop is faster than "leave". */ + else if (TARGET_USE_LEAVE || optimize_size + || !cfun->machine->use_fast_prologue_epilogue) + emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); + else + { + pro_epilogue_adjust_stack (stack_pointer_rtx, + hard_frame_pointer_rtx, + const0_rtx, style); + if (TARGET_64BIT) + emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); + else + emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); + } + } + else + { + /* First step is to deallocate the stack frame so that we can + pop the registers. */ + if (!sp_valid) + { + gcc_assert (frame_pointer_needed); + pro_epilogue_adjust_stack (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (offset), style); + } + else if (frame.to_allocate) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame.to_allocate), style); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (ix86_save_reg (regno, false)) + { + if (TARGET_64BIT) + emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); + else + emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); + } + if (frame_pointer_needed) + { + /* Leave results in shorter dependency chains on CPUs that are + able to grok it fast. */ + if (TARGET_USE_LEAVE) + emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); + else if (TARGET_64BIT) + emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); + else + emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); + } + } + + if (cfun->machine->force_align_arg_pointer) + { + emit_insn (gen_addsi3 (stack_pointer_rtx, + cfun->machine->force_align_arg_pointer, + GEN_INT (-4))); + } + + /* Sibcall epilogues don't want a return instruction. */ + if (style == 0) + return; + + if (current_function_pops_args && current_function_args_size) + { + rtx popc = GEN_INT (current_function_pops_args); + + /* i386 can only pop 64K bytes. If asked to pop more, pop + return address, do explicit add, and jump indirectly to the + caller. */ + + if (current_function_pops_args >= 65536) + { + rtx ecx = gen_rtx_REG (SImode, 2); + + /* There is no "pascal" calling convention in 64bit ABI. */ + gcc_assert (!TARGET_64BIT); + + emit_insn (gen_popsi1 (ecx)); + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); + emit_jump_insn (gen_return_indirect_internal (ecx)); + } + else + emit_jump_insn (gen_return_pop_internal (popc)); + } + else + emit_jump_insn (gen_return_internal ()); +} + +/* Reset from the function's potential modifications. */ + +static void +ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + if (pic_offset_table_rtx) + REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; +#if TARGET_MACHO + /* Mach-O doesn't support labels at the end of objects, so if + it looks like we might want one, insert a NOP. */ + { + rtx insn = get_last_insn (); + while (insn + && NOTE_P (insn) + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) + insn = PREV_INSN (insn); + if (insn + && (LABEL_P (insn) + || (NOTE_P (insn) + && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) + fputs ("\tnop\n", file); + } +#endif + +} + +/* Extract the parts of an RTL expression that is a valid memory address + for an instruction. Return 0 if the structure of the address is + grossly off. Return -1 if the address contains ASHIFT, so it is not + strictly valid, but still used for computing length of lea instruction. */ + +int +ix86_decompose_address (rtx addr, struct ix86_address *out) +{ + rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; + rtx base_reg, index_reg; + HOST_WIDE_INT scale = 1; + rtx scale_rtx = NULL_RTX; + int retval = 1; + enum ix86_address_seg seg = SEG_DEFAULT; + + if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) + base = addr; + else if (GET_CODE (addr) == PLUS) + { + rtx addends[4], op; + int n = 0, i; + + op = addr; + do + { + if (n >= 4) + return 0; + addends[n++] = XEXP (op, 1); + op = XEXP (op, 0); + } + while (GET_CODE (op) == PLUS); + if (n >= 4) + return 0; + addends[n] = op; + + for (i = n; i >= 0; --i) + { + op = addends[i]; + switch (GET_CODE (op)) + { + case MULT: + if (index) + return 0; + index = XEXP (op, 0); + scale_rtx = XEXP (op, 1); + break; + + case UNSPEC: + if (XINT (op, 1) == UNSPEC_TP + && TARGET_TLS_DIRECT_SEG_REFS + && seg == SEG_DEFAULT) + seg = TARGET_64BIT ? SEG_FS : SEG_GS; + else + return 0; + break; + + case REG: + case SUBREG: + if (!base) + base = op; + else if (!index) + index = op; + else + return 0; + break; + + case CONST: + case CONST_INT: + case SYMBOL_REF: + case LABEL_REF: + if (disp) + return 0; + disp = op; + break; + + default: + return 0; + } + } + } + else if (GET_CODE (addr) == MULT) + { + index = XEXP (addr, 0); /* index*scale */ + scale_rtx = XEXP (addr, 1); + } + else if (GET_CODE (addr) == ASHIFT) + { + rtx tmp; + + /* We're called for lea too, which implements ashift on occasion. */ + index = XEXP (addr, 0); + tmp = XEXP (addr, 1); + if (GET_CODE (tmp) != CONST_INT) + return 0; + scale = INTVAL (tmp); + if ((unsigned HOST_WIDE_INT) scale > 3) + return 0; + scale = 1 << scale; + retval = -1; + } + else + disp = addr; /* displacement */ + + /* Extract the integral value of scale. */ + if (scale_rtx) + { + if (GET_CODE (scale_rtx) != CONST_INT) + return 0; + scale = INTVAL (scale_rtx); + } + + base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; + index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; + + /* Allow arg pointer and stack pointer as index if there is not scaling. */ + if (base_reg && index_reg && scale == 1 + && (index_reg == arg_pointer_rtx + || index_reg == frame_pointer_rtx + || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) + { + rtx tmp; + tmp = base, base = index, index = tmp; + tmp = base_reg, base_reg = index_reg, index_reg = tmp; + } + + /* Special case: %ebp cannot be encoded as a base without a displacement. */ + if ((base_reg == hard_frame_pointer_rtx + || base_reg == frame_pointer_rtx + || base_reg == arg_pointer_rtx) && !disp) + disp = const0_rtx; + + /* Special case: on K6, [%esi] makes the instruction vector decoded. + Avoid this by transforming to [%esi+0]. */ + if (ix86_tune == PROCESSOR_K6 && !optimize_size + && base_reg && !index_reg && !disp + && REG_P (base_reg) + && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) + disp = const0_rtx; + + /* Special case: encode reg+reg instead of reg*2. */ + if (!base && index && scale && scale == 2) + base = index, base_reg = index_reg, scale = 1; + + /* Special case: scaling cannot be encoded without base or displacement. */ + if (!base && !disp && index && scale != 1) + disp = const0_rtx; + + out->base = base; + out->index = index; + out->disp = disp; + out->scale = scale; + out->seg = seg; + + return retval; +} + +/* Return cost of the memory address x. + For i386, it is better to use a complex address than let gcc copy + the address into a reg and make a new pseudo. But not if the address + requires to two regs - that would mean more pseudos with longer + lifetimes. */ +static int +ix86_address_cost (rtx x) +{ + struct ix86_address parts; + int cost = 1; + int ok = ix86_decompose_address (x, &parts); + + gcc_assert (ok); + + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + + /* More complex memory references are better. */ + if (parts.disp && parts.disp != const0_rtx) + cost--; + if (parts.seg != SEG_DEFAULT) + cost--; + + /* Attempt to minimize number of registers in the address. */ + if ((parts.base + && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) + || (parts.index + && (!REG_P (parts.index) + || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) + cost++; + + if (parts.base + && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) + && parts.index + && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) + && parts.base != parts.index) + cost++; + + /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, + since it's predecode logic can't detect the length of instructions + and it degenerates to vector decoded. Increase cost of such + addresses here. The penalty is minimally 2 cycles. It may be worthwhile + to split such addresses or even refuse such addresses at all. + + Following addressing modes are affected: + [base+scale*index] + [scale*index+disp] + [base+index] + + The first and last case may be avoidable by explicitly coding the zero in + memory address, but I don't have AMD-K6 machine handy to check this + theory. */ + + if (TARGET_K6 + && ((!parts.disp && parts.base && parts.index && parts.scale != 1) + || (parts.disp && !parts.base && parts.index && parts.scale != 1) + || (!parts.disp && parts.base && parts.index && parts.scale == 1))) + cost += 10; + + return cost; +} + +/* If X is a machine specific address (i.e. a symbol or label being + referenced as a displacement from the GOT implemented using an + UNSPEC), then return the base term. Otherwise return X. */ + +rtx +ix86_find_base_term (rtx x) +{ + rtx term; + + if (TARGET_64BIT) + { + if (GET_CODE (x) != CONST) + return x; + term = XEXP (x, 0); + if (GET_CODE (term) == PLUS + && (GET_CODE (XEXP (term, 1)) == CONST_INT + || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) + term = XEXP (term, 0); + if (GET_CODE (term) != UNSPEC + || XINT (term, 1) != UNSPEC_GOTPCREL) + return x; + + term = XVECEXP (term, 0, 0); + + if (GET_CODE (term) != SYMBOL_REF + && GET_CODE (term) != LABEL_REF) + return x; + + return term; + } + + term = ix86_delegitimize_address (x); + + if (GET_CODE (term) != SYMBOL_REF + && GET_CODE (term) != LABEL_REF) + return x; + + return term; +} + +/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as + this is used for to form addresses to local data when -fPIC is in + use. */ + +static bool +darwin_local_data_pic (rtx disp) +{ + if (GET_CODE (disp) == MINUS) + { + if (GET_CODE (XEXP (disp, 0)) == LABEL_REF + || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) + if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) + { + const char *sym_name = XSTR (XEXP (disp, 1), 0); + if (! strcmp (sym_name, "<pic base>")) + return true; + } + } + + return false; +} + +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +bool +legitimate_constant_p (rtx x) +{ + switch (GET_CODE (x)) + { + case CONST: + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return false; + x = XEXP (x, 0); + } + + if (TARGET_MACHO && darwin_local_data_pic (x)) + return true; + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + case UNSPEC_GOTOFF: + return TARGET_64BIT; + case UNSPEC_TPOFF: + case UNSPEC_NTPOFF: + x = XVECEXP (x, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_DTPOFF: + x = XVECEXP (x, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); + default: + return false; + } + + /* We must have drilled down to a symbol. */ + if (GET_CODE (x) == LABEL_REF) + return true; + if (GET_CODE (x) != SYMBOL_REF) + return false; + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS symbols are never valid. */ + if (SYMBOL_REF_TLS_MODEL (x)) + return false; + /* APPLE LOCAL begin dynamic-no-pic */ +#if TARGET_MACHO + if (TARGET_MACHO && MACHO_DYNAMIC_NO_PIC_P) + return machopic_symbol_defined_p (x); +#endif + break; + + case PLUS: + { + rtx left = XEXP (x, 0); + rtx right = XEXP (x, 1); + bool left_is_constant = legitimate_constant_p (left); + bool right_is_constant = legitimate_constant_p (right); + return left_is_constant && right_is_constant; + } + break; + /* APPLE LOCAL end dynamic-no-pic */ + + case CONST_DOUBLE: + if (GET_MODE (x) == TImode + && x != CONST0_RTX (TImode) + && !TARGET_64BIT) + return false; + break; + + case CONST_VECTOR: + /* APPLE LOCAL begin radar 4874197 mainline candidate */ + if (standard_sse_constant_p (x)) + /* APPLE LOCAL end radar 4874197 mainline candidate */ + return true; + return false; + + default: + break; + } + + /* Otherwise we handle everything else in the move patterns. */ + return true; +} + +/* Determine if it's legal to put X into the constant pool. This + is not possible for the address of thread-local symbols, which + is checked above. */ + +static bool +ix86_cannot_force_const_mem (rtx x) +{ + /* We can always put integral constants and vectors in memory. */ + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_DOUBLE: + case CONST_VECTOR: + return false; + + default: + break; + } + return !legitimate_constant_p (x); +} + +/* Determine if a given RTX is a valid constant address. */ + +bool +constant_address_p (rtx x) +{ + return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); +} + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +bool +legitimate_pic_operand_p (rtx x) +{ + rtx inner; + + switch (GET_CODE (x)) + { + case CONST: + inner = XEXP (x, 0); + if (GET_CODE (inner) == PLUS + && GET_CODE (XEXP (inner, 1)) == CONST_INT) + inner = XEXP (inner, 0); + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (inner) == UNSPEC) + switch (XINT (inner, 1)) + { + case UNSPEC_GOTOFF: + return TARGET_64BIT; + case UNSPEC_TPOFF: + x = XVECEXP (inner, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + default: + return false; + } + /* FALLTHRU */ + + case SYMBOL_REF: + case LABEL_REF: + return legitimate_pic_address_disp_p (x); + + default: + return true; + } +} + +/* Determine if a given CONST RTX is a valid memory displacement + in PIC mode. */ + +int +legitimate_pic_address_disp_p (rtx disp) +{ + bool saw_plus; + + /* In 64bit mode we can allow direct addresses of symbols and labels + when they are not dynamic symbols. */ + if (TARGET_64BIT) + { + rtx op0 = disp, op1; + + switch (GET_CODE (disp)) + { + case LABEL_REF: + return true; + + case CONST: + if (GET_CODE (XEXP (disp, 0)) != PLUS) + break; + op0 = XEXP (XEXP (disp, 0), 0); + op1 = XEXP (XEXP (disp, 0), 1); + if (GET_CODE (op1) != CONST_INT + || INTVAL (op1) >= 16*1024*1024 + || INTVAL (op1) < -16*1024*1024) + break; + if (GET_CODE (op0) == LABEL_REF) + return true; + if (GET_CODE (op0) != SYMBOL_REF) + break; + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS references should always be enclosed in UNSPEC. */ + if (SYMBOL_REF_TLS_MODEL (op0)) + return false; + /* APPLE LOCAL begin fix-and-continue 6227434 */ +#if TARGET_MACHO + if (machopic_data_defined_p (op0)) + return true; + + /* Under -mfix-and-continue, even local storage is + addressed via the GOT, so that the value of local + statics is preserved when a function is "fixed." */ + if (indirect_data (op0)) + return false; +#endif + /* APPLE LOCAL end fix-and-continue 6227434 */ + if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) + return true; + break; + + default: + break; + } + } + if (GET_CODE (disp) != CONST) + return 0; + disp = XEXP (disp, 0); + + if (TARGET_64BIT) + { + /* We are unsafe to allow PLUS expressions. This limit allowed distance + of GOT tables. We should not need these anyway. */ + if (GET_CODE (disp) != UNSPEC + || (XINT (disp, 1) != UNSPEC_GOTPCREL + && XINT (disp, 1) != UNSPEC_GOTOFF)) + return 0; + + if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF + && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) + return 0; + return 1; + } + + saw_plus = false; + if (GET_CODE (disp) == PLUS) + { + if (GET_CODE (XEXP (disp, 1)) != CONST_INT) + return 0; + disp = XEXP (disp, 0); + saw_plus = true; + } + + if (TARGET_MACHO && darwin_local_data_pic (disp)) + return 1; + + if (GET_CODE (disp) != UNSPEC) + return 0; + + switch (XINT (disp, 1)) + { + case UNSPEC_GOT: + if (saw_plus) + return false; + return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; + case UNSPEC_GOTOFF: + /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. + While ABI specify also 32bit relocation but we don't produce it in + small PIC model at all. */ + if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF + || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) + && !TARGET_64BIT) + return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); + return false; + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + if (saw_plus) + return false; + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); + case UNSPEC_NTPOFF: + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_DTPOFF: + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); + } + + return 0; +} + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid + memory address for an instruction. The MODE argument is the machine mode + for the MEM expression that wants to use this address. + + It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should + convert common non-canonical forms to canonical form so that they will + be recognized. */ + +int +legitimate_address_p (enum machine_mode mode, rtx addr, int strict) +{ + struct ix86_address parts; + rtx base, index, disp; + HOST_WIDE_INT scale; + const char *reason = NULL; + rtx reason_rtx = NULL_RTX; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", + GET_MODE_NAME (mode), strict); + debug_rtx (addr); + } + + if (ix86_decompose_address (addr, &parts) <= 0) + { + reason = "decomposition failed"; + goto report_error; + } + + base = parts.base; + index = parts.index; + disp = parts.disp; + scale = parts.scale; + + /* Validate base register. + + Don't allow SUBREG's that span more than a word here. It can lead to spill + failures when the base is one word out of a two word structure, which is + represented internally as a DImode int. */ + + if (base) + { + rtx reg; + reason_rtx = base; + + if (REG_P (base)) + reg = base; + else if (GET_CODE (base) == SUBREG + && REG_P (SUBREG_REG (base)) + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) + <= UNITS_PER_WORD) + reg = SUBREG_REG (base); + else + { + reason = "base is not a register"; + goto report_error; + } + + if (GET_MODE (base) != Pmode) + { + reason = "base is not in Pmode"; + goto report_error; + } + + if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) + { + reason = "base is not valid"; + goto report_error; + } + } + + /* Validate index register. + + Don't allow SUBREG's that span more than a word here -- same as above. */ + + if (index) + { + rtx reg; + reason_rtx = index; + + if (REG_P (index)) + reg = index; + else if (GET_CODE (index) == SUBREG + && REG_P (SUBREG_REG (index)) + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) + <= UNITS_PER_WORD) + reg = SUBREG_REG (index); + else + { + reason = "index is not a register"; + goto report_error; + } + + if (GET_MODE (index) != Pmode) + { + reason = "index is not in Pmode"; + goto report_error; + } + + if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) + { + reason = "index is not valid"; + goto report_error; + } + } + + /* Validate scale factor. */ + if (scale != 1) + { + reason_rtx = GEN_INT (scale); + if (!index) + { + reason = "scale without index"; + goto report_error; + } + + if (scale != 2 && scale != 4 && scale != 8) + { + reason = "scale is not a valid multiplier"; + goto report_error; + } + } + + /* Validate displacement. */ + if (disp) + { + reason_rtx = disp; + + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC) + switch (XINT (XEXP (disp, 0), 1)) + { + /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when + used. While ABI specify also 32bit relocations, we don't produce + them at all and use IP relative instead. */ + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + gcc_assert (flag_pic); + if (!TARGET_64BIT) + goto is_legitimate_pic; + reason = "64bit address unspec"; + goto report_error; + + case UNSPEC_GOTPCREL: + gcc_assert (flag_pic); + goto is_legitimate_pic; + + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + case UNSPEC_NTPOFF: + case UNSPEC_DTPOFF: + break; + + default: + reason = "invalid address unspec"; + goto report_error; + } + + else if (SYMBOLIC_CONST (disp) + && (flag_pic + || (TARGET_MACHO +#if TARGET_MACHO + && MACHOPIC_INDIRECT + && !machopic_operand_p (disp) +#endif + ))) + { + + is_legitimate_pic: + if (TARGET_64BIT && (index || base)) + { + /* foo@dtpoff(%rX) is ok. */ + if (GET_CODE (disp) != CONST + || GET_CODE (XEXP (disp, 0)) != PLUS + || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC + || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT + || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + { + reason = "non-constant pic memory reference"; + goto report_error; + } + } + /* APPLE LOCAL begin dynamic-no-pic */ + else if (flag_pic && ! legitimate_pic_address_disp_p (disp)) + { + reason = "displacement is an invalid pic construct"; + goto report_error; + } +#if TARGET_MACHO + else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp)) + { + reason = "displacment must be referenced via non_lazy_pointer"; + goto report_error; + } +#endif + /* APPLE LOCAL end dynamic-no-pic */ + + /* This code used to verify that a symbolic pic displacement + includes the pic_offset_table_rtx register. + + While this is good idea, unfortunately these constructs may + be created by "adds using lea" optimization for incorrect + code like: + + int a; + int foo(int i) + { + return *(&a+i); + } + + This code is nonsensical, but results in addressing + GOT table with pic_offset_table_rtx base. We can't + just refuse it easily, since it gets matched by + "addsi3" pattern, that later gets split to lea in the + case output register differs from input. While this + can be handled by separate addsi pattern for this case + that never results in lea, this seems to be easier and + correct fix for crash to disable this test. */ + } + else if (GET_CODE (disp) != LABEL_REF + && GET_CODE (disp) != CONST_INT + && (GET_CODE (disp) != CONST + || !legitimate_constant_p (disp)) + && (GET_CODE (disp) != SYMBOL_REF + || !legitimate_constant_p (disp))) + { + reason = "displacement is not constant"; + goto report_error; + } + else if (TARGET_64BIT + && !x86_64_immediate_operand (disp, VOIDmode)) + { + reason = "displacement is out of range"; + goto report_error; + } + } + + /* Everything looks valid. */ + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "Success.\n"); + return TRUE; + + report_error: + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "Error: %s\n", reason); + debug_rtx (reason_rtx); + } + return FALSE; +} + +/* Return a unique alias set for the GOT. */ + +static HOST_WIDE_INT +ix86_GOT_alias_set (void) +{ + static HOST_WIDE_INT set = -1; + if (set == -1) + set = new_alias_set (); + return set; +} + +/* Return a legitimate reference for ORIG (an address) using the + register REG. If REG is 0, a new pseudo is generated. + + There are two types of references that must be handled: + + 1. Global data references must load the address from the GOT, via + the PIC reg. An insn is emitted to do this load, and the reg is + returned. + + 2. Static data references, constant pool addresses, and code labels + compute the address as an offset from the GOT, whose base is in + the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to + differentiate them from global data objects. The returned + address is the PIC reg + an unspec constant. + + GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC + reg also appears in the address. */ + +static rtx +legitimize_pic_address (rtx orig, rtx reg) +{ + rtx addr = orig; + rtx new = orig; + rtx base; + +#if TARGET_MACHO + if (TARGET_MACHO && !TARGET_64BIT) + { + if (reg == 0) + reg = gen_reg_rtx (Pmode); + /* Use the generic Mach-O PIC machinery. */ + return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); + } +#endif + + if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) + new = addr; + else if (TARGET_64BIT + && ix86_cmodel != CM_SMALL_PIC + && local_symbolic_operand (addr, Pmode)) + { + rtx tmpreg; + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); + new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); + } + else + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new = gen_rtx_CONST (Pmode, new); + if (!reg) + tmpreg = gen_reg_rtx (Pmode); + else + tmpreg = reg; + emit_move_insn (tmpreg, new); + + if (reg != 0) + { + new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, + tmpreg, 1, OPTAB_DIRECT); + new = reg; + } + else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); + } + else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) + { + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); + new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); + } + else + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new = gen_rtx_CONST (Pmode, new); + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); + + if (reg != 0) + { + emit_move_insn (reg, new); + new = reg; + } + } + else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) + { + if (TARGET_64BIT) + { + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); + new = gen_rtx_CONST (Pmode, new); + new = gen_const_mem (Pmode, new); + set_mem_alias_set (new, ix86_GOT_alias_set ()); + + if (reg == 0) + reg = gen_reg_rtx (Pmode); + /* Use directly gen_movsi, otherwise the address is loaded + into register for CSE. We don't want to CSE this addresses, + instead we CSE addresses from the GOT table, so skip this. */ + emit_insn (gen_movsi (reg, new)); + new = reg; + } + else + { + /* This symbol must be referenced via a load from the + Global Offset Table (@GOT). */ + + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); + new = gen_rtx_CONST (Pmode, new); + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); + new = gen_const_mem (Pmode, new); + set_mem_alias_set (new, ix86_GOT_alias_set ()); + + if (reg == 0) + reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, new); + new = reg; + } + } + else + { + if (GET_CODE (addr) == CONST_INT + && !x86_64_immediate_operand (addr, VOIDmode)) + { + if (reg) + { + emit_move_insn (reg, addr); + new = reg; + } + else + new = force_reg (Pmode, addr); + } + else if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + + /* We must match stuff we generate before. Assume the only + unspecs that can get here are ours. Not that we could do + anything with them anyway.... */ + if (GET_CODE (addr) == UNSPEC + || (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == UNSPEC)) + return orig; + gcc_assert (GET_CODE (addr) == PLUS); + } + if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); + + /* Check first to see if this is a constant offset from a @GOTOFF + symbol reference. */ + if (local_symbolic_operand (op0, Pmode) + && GET_CODE (op1) == CONST_INT) + { + if (!TARGET_64BIT) + { + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), + UNSPEC_GOTOFF); + new = gen_rtx_PLUS (Pmode, new, op1); + new = gen_rtx_CONST (Pmode, new); + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); + + if (reg != 0) + { + emit_move_insn (reg, new); + new = reg; + } + } + else + { + if (INTVAL (op1) < -16*1024*1024 + || INTVAL (op1) >= 16*1024*1024) + { + if (!x86_64_immediate_operand (op1, Pmode)) + op1 = force_reg (Pmode, op1); + new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); + } + } + } + else + { + base = legitimize_pic_address (XEXP (addr, 0), reg); + new = legitimize_pic_address (XEXP (addr, 1), + base == reg ? NULL_RTX : reg); + + if (GET_CODE (new) == CONST_INT) + new = plus_constant (base, INTVAL (new)); + else + { + if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); + new = XEXP (new, 1); + } + new = gen_rtx_PLUS (Pmode, base, new); + /* APPLE LOCAL begin fix-and-continue 6358507 */ + if (!legitimate_address_p (Pmode, new, FALSE)) + new = force_reg (Pmode, new); + /* APPLE LOCAL end fix-and-continue 6358507 */ + } + } + } + } + return new; +} + +/* Load the thread pointer. If TO_REG is true, force it into a register. */ + +static rtx +get_thread_pointer (int to_reg) +{ + rtx tp, reg, insn; + + tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + if (!to_reg) + return tp; + + reg = gen_reg_rtx (Pmode); + insn = gen_rtx_SET (VOIDmode, reg, tp); + insn = emit_insn (insn); + + return reg; +} + +/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is + false if we expect this to be used for a memory address and true if + we expect to load the address into a register. */ + +static rtx +legitimize_tls_address (rtx x, enum tls_model model, int for_mov) +{ + rtx dest, base, off, pic, tp; + int type; + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + dest = gen_reg_rtx (Pmode); + tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; + + if (TARGET_64BIT && ! TARGET_GNU2_TLS) + { + rtx rax = gen_rtx_REG (Pmode, 0), insns; + + start_sequence (); + emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); + insns = get_insns (); + end_sequence (); + + emit_libcall_block (insns, dest, rax, x); + } + else if (TARGET_64BIT && TARGET_GNU2_TLS) + emit_insn (gen_tls_global_dynamic_64 (dest, x)); + else + emit_insn (gen_tls_global_dynamic_32 (dest, x)); + + if (TARGET_GNU2_TLS) + { + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); + + set_unique_reg_note (get_last_insn (), REG_EQUIV, x); + } + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + base = gen_reg_rtx (Pmode); + tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; + + if (TARGET_64BIT && ! TARGET_GNU2_TLS) + { + rtx rax = gen_rtx_REG (Pmode, 0), insns, note; + + start_sequence (); + emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); + insns = get_insns (); + end_sequence (); + + note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); + note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); + emit_libcall_block (insns, base, rax, note); + } + else if (TARGET_64BIT && TARGET_GNU2_TLS) + emit_insn (gen_tls_local_dynamic_base_64 (base)); + else + emit_insn (gen_tls_local_dynamic_base_32 (base)); + + if (TARGET_GNU2_TLS) + { + rtx x = ix86_tls_module_base (); + + set_unique_reg_note (get_last_insn (), REG_EQUIV, + gen_rtx_MINUS (Pmode, x, tp)); + } + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); + off = gen_rtx_CONST (Pmode, off); + + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); + + if (TARGET_GNU2_TLS) + { + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); + + set_unique_reg_note (get_last_insn (), REG_EQUIV, x); + } + + break; + + case TLS_MODEL_INITIAL_EXEC: + if (TARGET_64BIT) + { + pic = NULL; + type = UNSPEC_GOTNTPOFF; + } + else if (flag_pic) + { + if (reload_in_progress) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + pic = pic_offset_table_rtx; + type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; + } + else if (!TARGET_ANY_GNU_TLS) + { + pic = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (pic)); + type = UNSPEC_GOTTPOFF; + } + else + { + pic = NULL; + type = UNSPEC_INDNTPOFF; + } + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (Pmode, off); + if (pic) + off = gen_rtx_PLUS (Pmode, pic, off); + off = gen_const_mem (Pmode, off); + set_mem_alias_set (off, ix86_GOT_alias_set ()); + + if (TARGET_64BIT || TARGET_ANY_GNU_TLS) + { + base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + off = force_reg (Pmode, off); + return gen_rtx_PLUS (Pmode, base, off); + } + else + { + base = get_thread_pointer (true); + dest = gen_reg_rtx (Pmode); + emit_insn (gen_subsi3 (dest, base, off)); + } + break; + + case TLS_MODEL_LOCAL_EXEC: + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), + (TARGET_64BIT || TARGET_ANY_GNU_TLS) + ? UNSPEC_NTPOFF : UNSPEC_TPOFF); + off = gen_rtx_CONST (Pmode, off); + + if (TARGET_64BIT || TARGET_ANY_GNU_TLS) + { + base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + return gen_rtx_PLUS (Pmode, base, off); + } + else + { + base = get_thread_pointer (true); + dest = gen_reg_rtx (Pmode); + emit_insn (gen_subsi3 (dest, base, off)); + } + break; + + default: + gcc_unreachable (); + } + + return dest; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + MODE and WIN are passed so that this macro can use + GO_IF_LEGITIMATE_ADDRESS. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the 80386, we handle X+REG by loading X into a register R and + using R+REG. R will go in a general reg and indexing will be used. + However, if REG is a broken-out memory address or multiplication, + nothing needs to be done because REG can certainly go in a general reg. + + When -fpic is used, special handling is needed for symbolic references. + See comments by legitimize_pic_address in i386.c for details. */ + +rtx +legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) +{ + int changed = 0; + unsigned log; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", + GET_MODE_NAME (mode)); + debug_rtx (x); + } + + log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; + if (log) + return legitimize_tls_address (x, log, false); + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF + && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) + { + rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); + return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); + } + + if (flag_pic && SYMBOLIC_CONST (x)) + return legitimize_pic_address (x, 0); + /* APPLE LOCAL begin dynamic-no-pic */ +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) + return machopic_indirect_data_reference (x, 0); +#endif + /* APPLE LOCAL end dynamic-no-pic */ + + /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ + if (GET_CODE (x) == ASHIFT + && GET_CODE (XEXP (x, 1)) == CONST_INT + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) + { + changed = 1; + log = INTVAL (XEXP (x, 1)); + x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), + GEN_INT (1 << log)); + } + + if (GET_CODE (x) == PLUS) + { + /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ + + if (GET_CODE (XEXP (x, 0)) == ASHIFT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) + { + changed = 1; + log = INTVAL (XEXP (XEXP (x, 0), 1)); + XEXP (x, 0) = gen_rtx_MULT (Pmode, + force_reg (Pmode, XEXP (XEXP (x, 0), 0)), + GEN_INT (1 << log)); + } + + if (GET_CODE (XEXP (x, 1)) == ASHIFT + && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) + { + changed = 1; + log = INTVAL (XEXP (XEXP (x, 1), 1)); + XEXP (x, 1) = gen_rtx_MULT (Pmode, + force_reg (Pmode, XEXP (XEXP (x, 1), 0)), + GEN_INT (1 << log)); + } + + /* Put multiply first if it isn't already. */ + if (GET_CODE (XEXP (x, 1)) == MULT) + { + rtx tmp = XEXP (x, 0); + XEXP (x, 0) = XEXP (x, 1); + XEXP (x, 1) = tmp; + changed = 1; + } + + /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) + into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be + created by virtual register instantiation, register elimination, and + similar optimizations. */ + if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) + { + changed = 1; + x = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, XEXP (x, 0), + XEXP (XEXP (x, 1), 0)), + XEXP (XEXP (x, 1), 1)); + } + + /* Canonicalize + (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) + into (plus (plus (mult (reg) (const)) (reg)) (const)). */ + else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS + && CONSTANT_P (XEXP (x, 1))) + { + rtx constant; + rtx other = NULL_RTX; + + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + constant = XEXP (x, 1); + other = XEXP (XEXP (XEXP (x, 0), 1), 1); + } + else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) + { + constant = XEXP (XEXP (XEXP (x, 0), 1), 1); + other = XEXP (x, 1); + } + else + constant = 0; + + if (constant) + { + changed = 1; + x = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), + XEXP (XEXP (XEXP (x, 0), 1), 0)), + plus_constant (other, INTVAL (constant))); + } + } + + if (changed && legitimate_address_p (mode, x, FALSE)) + return x; + + if (GET_CODE (XEXP (x, 0)) == MULT) + { + changed = 1; + XEXP (x, 0) = force_operand (XEXP (x, 0), 0); + } + + if (GET_CODE (XEXP (x, 1)) == MULT) + { + changed = 1; + XEXP (x, 1) = force_operand (XEXP (x, 1), 0); + } + + if (changed + && GET_CODE (XEXP (x, 1)) == REG + && GET_CODE (XEXP (x, 0)) == REG) + return x; + + if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) + { + changed = 1; + x = legitimize_pic_address (x, 0); + } + + if (changed && legitimate_address_p (mode, x, FALSE)) + return x; + + if (GET_CODE (XEXP (x, 0)) == REG) + { + rtx temp = gen_reg_rtx (Pmode); + rtx val = force_operand (XEXP (x, 1), temp); + if (val != temp) + emit_move_insn (temp, val); + + XEXP (x, 1) = temp; + return x; + } + + else if (GET_CODE (XEXP (x, 1)) == REG) + { + rtx temp = gen_reg_rtx (Pmode); + rtx val = force_operand (XEXP (x, 0), temp); + if (val != temp) + emit_move_insn (temp, val); + + XEXP (x, 0) = temp; + return x; + } + } + + return x; +} + +/* Print an integer constant expression in assembler syntax. Addition + and subtraction are the only arithmetic that may appear in these + expressions. FILE is the stdio stream to write to, X is the rtx, and + CODE is the operand print code from the output string. */ + +static void +output_pic_addr_const (FILE *file, rtx x, int code) +{ + char buf[256]; + + switch (GET_CODE (x)) + { + case PC: + gcc_assert (flag_pic); + putc ('.', file); + break; + + case SYMBOL_REF: + /* APPLE LOCAL begin axe stubs 5571540 */ + if (! TARGET_MACHO || +#if TARGET_MACHO + ! darwin_stubs || +#endif + TARGET_64BIT) + /* APPLE LOCAL end axe stubs 5571540 */ + output_addr_const (file, x); + else + { + const char *name = XSTR (x, 0); + + /* Mark the decl as referenced so that cgraph will output the function. */ + if (SYMBOL_REF_DECL (x)) + mark_decl_referenced (SYMBOL_REF_DECL (x)); + +#if TARGET_MACHO + if (MACHOPIC_INDIRECT + && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) + name = machopic_indirection_name (x, /*stub_p=*/true); +#endif + assemble_name (file, name); + } + if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) + fputs ("@PLT", file); + break; + + case LABEL_REF: + x = XEXP (x, 0); + /* FALLTHRU */ + case CODE_LABEL: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); + assemble_name (asm_out_file, buf); + break; + + case CONST_INT: + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case CONST: + /* This used to output parentheses around the expression, + but that does not work on the 386 (either ATT or BSD assembler). */ + output_pic_addr_const (file, XEXP (x, 0), code); + break; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode) + { + /* We can use %d if the number is <32 bits and positive. */ + if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) + fprintf (file, "0x%lx%08lx", + (unsigned long) CONST_DOUBLE_HIGH (x), + (unsigned long) CONST_DOUBLE_LOW (x)); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); + } + else + /* We can't handle floating point constants; + PRINT_OPERAND must handle them. */ + output_operand_lossage ("floating constant misused"); + break; + + case PLUS: + /* Some assemblers need integer constants to appear first. */ + if (GET_CODE (XEXP (x, 0)) == CONST_INT) + { + output_pic_addr_const (file, XEXP (x, 0), code); + putc ('+', file); + output_pic_addr_const (file, XEXP (x, 1), code); + } + else + { + gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); + output_pic_addr_const (file, XEXP (x, 1), code); + putc ('+', file); + output_pic_addr_const (file, XEXP (x, 0), code); + } + break; + + case MINUS: + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); + output_pic_addr_const (file, XEXP (x, 0), code); + putc ('-', file); + output_pic_addr_const (file, XEXP (x, 1), code); + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); + break; + + case UNSPEC: + gcc_assert (XVECLEN (x, 0) == 1); + output_pic_addr_const (file, XVECEXP (x, 0, 0), code); + switch (XINT (x, 1)) + { + case UNSPEC_GOT: + fputs ("@GOT", file); + break; + case UNSPEC_GOTOFF: + fputs ("@GOTOFF", file); + break; + case UNSPEC_GOTPCREL: + fputs ("@GOTPCREL(%rip)", file); + break; + case UNSPEC_GOTTPOFF: + /* FIXME: This might be @TPOFF in Sun ld too. */ + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TPOFF: + fputs ("@TPOFF", file); + break; + case UNSPEC_NTPOFF: + if (TARGET_64BIT) + fputs ("@TPOFF", file); + else + fputs ("@NTPOFF", file); + break; + case UNSPEC_DTPOFF: + fputs ("@DTPOFF", file); + break; + case UNSPEC_GOTNTPOFF: + if (TARGET_64BIT) + fputs ("@GOTTPOFF(%rip)", file); + else + fputs ("@GOTNTPOFF", file); + break; + case UNSPEC_INDNTPOFF: + fputs ("@INDNTPOFF", file); + break; + default: + output_operand_lossage ("invalid UNSPEC as operand"); + break; + } + break; + + default: + output_operand_lossage ("invalid expression as operand"); + } +} + +/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + +static void +i386_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + fputs (ASM_LONG, file); + output_addr_const (file, x); + fputs ("@DTPOFF", file); + switch (size) + { + case 4: + break; + case 8: + fputs (", 0", file); + break; + default: + gcc_unreachable (); + } +} + +/* In the name of slightly smaller debug output, and to cater to + general assembler lossage, recognize PIC+GOTOFF and turn it back + into a direct symbol reference. + + On Darwin, this is necessary to avoid a crash, because Darwin + has a different PIC label for each routine but the DWARF debugging + information is not associated with any particular routine, so it's + necessary to remove references to the PIC label from RTL stored by + the DWARF output code. */ + +static rtx +ix86_delegitimize_address (rtx orig_x) +{ + rtx x = orig_x; + /* reg_addend is NULL or a multiple of some register. */ + rtx reg_addend = NULL_RTX; + /* const_addend is NULL or a const_int. */ + rtx const_addend = NULL_RTX; + /* This is the result, or NULL. */ + rtx result = NULL_RTX; + + if (GET_CODE (x) == MEM) + x = XEXP (x, 0); + + if (TARGET_64BIT) + { + if (GET_CODE (x) != CONST + || GET_CODE (XEXP (x, 0)) != UNSPEC + || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL + || GET_CODE (orig_x) != MEM) + return orig_x; + return XVECEXP (XEXP (x, 0), 0, 0); + } + + if (GET_CODE (x) != PLUS + || GET_CODE (XEXP (x, 1)) != CONST) + return orig_x; + + if (GET_CODE (XEXP (x, 0)) == REG + && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) + /* %ebx + GOT/GOTOFF */ + ; + else if (GET_CODE (XEXP (x, 0)) == PLUS) + { + /* %ebx + %reg * scale + GOT/GOTOFF */ + reg_addend = XEXP (x, 0); + if (GET_CODE (XEXP (reg_addend, 0)) == REG + && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) + reg_addend = XEXP (reg_addend, 1); + else if (GET_CODE (XEXP (reg_addend, 1)) == REG + && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) + reg_addend = XEXP (reg_addend, 0); + else + return orig_x; + if (GET_CODE (reg_addend) != REG + && GET_CODE (reg_addend) != MULT + && GET_CODE (reg_addend) != ASHIFT) + return orig_x; + } + else + return orig_x; + + x = XEXP (XEXP (x, 1), 0); + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == CONST_INT) + { + const_addend = XEXP (x, 1); + x = XEXP (x, 0); + } + + if (GET_CODE (x) == UNSPEC + && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) + || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) + result = XVECEXP (x, 0, 0); + + if (TARGET_MACHO && darwin_local_data_pic (x) + && GET_CODE (orig_x) != MEM) + result = XEXP (x, 0); + + if (! result) + return orig_x; + + if (const_addend) + result = gen_rtx_PLUS (Pmode, result, const_addend); + if (reg_addend) + result = gen_rtx_PLUS (Pmode, reg_addend, result); + return result; +} + +static void +put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, + int fp, FILE *file) +{ + const char *suffix; + + if (mode == CCFPmode || mode == CCFPUmode) + { + enum rtx_code second_code, bypass_code; + ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); + gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); + code = ix86_fp_compare_code_to_integer (code); + mode = CCmode; + } + if (reverse) + code = reverse_condition (code); + + switch (code) + { + case EQ: + suffix = "e"; + break; + case NE: + suffix = "ne"; + break; + case GT: + gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); + suffix = "g"; + break; + case GTU: + /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. + Those same assemblers have the same but opposite lossage on cmov. */ + gcc_assert (mode == CCmode); + suffix = fp ? "nbe" : "a"; + break; + case LT: + switch (mode) + { + case CCNOmode: + case CCGOCmode: + suffix = "s"; + break; + + case CCmode: + case CCGCmode: + suffix = "l"; + break; + + default: + gcc_unreachable (); + } + break; + case LTU: + gcc_assert (mode == CCmode); + suffix = "b"; + break; + case GE: + switch (mode) + { + case CCNOmode: + case CCGOCmode: + suffix = "ns"; + break; + + case CCmode: + case CCGCmode: + suffix = "ge"; + break; + + default: + gcc_unreachable (); + } + break; + case GEU: + /* ??? As above. */ + gcc_assert (mode == CCmode); + suffix = fp ? "nb" : "ae"; + break; + case LE: + gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); + suffix = "le"; + break; + case LEU: + gcc_assert (mode == CCmode); + suffix = "be"; + break; + case UNORDERED: + suffix = fp ? "u" : "p"; + break; + case ORDERED: + suffix = fp ? "nu" : "np"; + break; + default: + gcc_unreachable (); + } + fputs (suffix, file); +} + +/* Print the name of register X to FILE based on its machine mode and number. + If CODE is 'w', pretend the mode is HImode. + If CODE is 'b', pretend the mode is QImode. + If CODE is 'k', pretend the mode is SImode. + If CODE is 'q', pretend the mode is DImode. + If CODE is 'h', pretend the reg is the 'high' byte register. + If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ + +void +print_reg (rtx x, int code, FILE *file) +{ + gcc_assert (REGNO (x) != ARG_POINTER_REGNUM + && REGNO (x) != FRAME_POINTER_REGNUM + && REGNO (x) != FLAGS_REG + && REGNO (x) != FPSR_REG); + + if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) + putc ('%', file); + + if (code == 'w' || MMX_REG_P (x)) + code = 2; + else if (code == 'b') + code = 1; + else if (code == 'k') + code = 4; + else if (code == 'q') + code = 8; + else if (code == 'y') + code = 3; + else if (code == 'h') + code = 0; + else + code = GET_MODE_SIZE (GET_MODE (x)); + + /* Irritatingly, AMD extended registers use different naming convention + from the normal registers. */ + if (REX_INT_REG_P (x)) + { + gcc_assert (TARGET_64BIT); + switch (code) + { + case 0: + error ("extended registers have no high halves"); + break; + case 1: + fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); + break; + case 2: + fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); + break; + case 4: + fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); + break; + case 8: + fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); + break; + default: + error ("unsupported operand size for extended register"); + break; + } + return; + } + switch (code) + { + case 3: + if (STACK_TOP_P (x)) + { + fputs ("st(0)", file); + break; + } + /* FALLTHRU */ + case 8: + case 4: + case 12: + if (! ANY_FP_REG_P (x)) + putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); + /* FALLTHRU */ + case 16: + case 2: + normal: + fputs (hi_reg_name[REGNO (x)], file); + break; + case 1: + if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) + goto normal; + fputs (qi_reg_name[REGNO (x)], file); + break; + case 0: + if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) + goto normal; + fputs (qi_high_reg_name[REGNO (x)], file); + break; + default: + gcc_unreachable (); + } +} + +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in some tls_local_dynamic_base + pattern. */ + +static const char * +get_some_local_dynamic_name (void) +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + gcc_unreachable (); +} + +static int +get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + +/* Meaning of CODE: + L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. + C -- print opcode suffix for set/cmov insn. + c -- like C, but print reversed condition + F,f -- likewise, but for floating-point. + O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", + otherwise nothing + R -- print the prefix for register names. + z -- print the opcode suffix for the size of the current operand. + * -- print a star (in certain assembler syntax) + A -- print an absolute memory reference. + w -- print the operand as if it's a "word" (HImode) even if it isn't. + s -- print a shift double count, followed by the assemblers argument + delimiter. + b -- print the QImode name of the register for the indicated operand. + %b0 would print %al if operands[0] is reg 0. + w -- likewise, print the HImode name of the register. + k -- likewise, print the SImode name of the register. + q -- likewise, print the DImode name of the register. + h -- print the QImode name for a "high" register, either ah, bh, ch or dh. + y -- print "st(0)" instead of "st" as a register. + D -- print condition for SSE cmp instruction. + P -- if PIC, print an @PLT suffix. + X -- don't print any sort of PIC '@' suffix for a symbol. + & -- print some in-use local-dynamic symbol name. + H -- print a memory address offset by 8; used for sse high-parts + */ + +void +print_operand (FILE *file, rtx x, int code) +{ + if (code) + { + switch (code) + { + case '*': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('*', file); + return; + + case '&': + assemble_name (file, get_some_local_dynamic_name ()); + return; + + case 'A': + switch (ASSEMBLER_DIALECT) + { + case ASM_ATT: + putc ('*', file); + break; + + case ASM_INTEL: + /* Intel syntax. For absolute addresses, registers should not + be surrounded by braces. */ + if (GET_CODE (x) != REG) + { + putc ('[', file); + PRINT_OPERAND (file, x, 0); + putc (']', file); + return; + } + break; + + default: + gcc_unreachable (); + } + + PRINT_OPERAND (file, x, 0); + return; + + + case 'L': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('l', file); + return; + + case 'W': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('w', file); + return; + + case 'B': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('b', file); + return; + + case 'Q': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('l', file); + return; + + case 'S': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('s', file); + return; + + case 'T': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('t', file); + return; + + case 'z': + /* 387 opcodes don't get size suffixes if the operands are + registers. */ + if (STACK_REG_P (x)) + return; + + /* Likewise if using Intel opcodes. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + return; + + /* This is the size of op from size of operand. */ + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 2: +#ifdef HAVE_GAS_FILDS_FISTS + putc ('s', file); +#endif + return; + + case 4: + if (GET_MODE (x) == SFmode) + { + putc ('s', file); + return; + } + else + putc ('l', file); + return; + + case 12: + case 16: + putc ('t', file); + return; + + case 8: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { +#ifdef GAS_MNEMONICS + putc ('q', file); +#else + putc ('l', file); + putc ('l', file); +#endif + } + else + putc ('l', file); + return; + + default: + gcc_unreachable (); + } + + case 'b': + case 'w': + case 'k': + case 'q': + case 'h': + case 'y': + case 'X': + case 'P': + break; + + case 's': + if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) + { + PRINT_OPERAND (file, x, 0); + putc (',', file); + } + return; + + case 'D': + /* Little bit of braindamage here. The SSE compare instructions + does use completely different names for the comparisons that the + fp conditional moves. */ + switch (GET_CODE (x)) + { + case EQ: + case UNEQ: + fputs ("eq", file); + break; + case LT: + case UNLT: + fputs ("lt", file); + break; + case LE: + case UNLE: + fputs ("le", file); + break; + case UNORDERED: + fputs ("unord", file); + break; + case NE: + case LTGT: + fputs ("neq", file); + break; + case UNGE: + case GE: + fputs ("nlt", file); + break; + case UNGT: + case GT: + fputs ("nle", file); + break; + case ORDERED: + fputs ("ord", file); + break; + default: + gcc_unreachable (); + } + return; + case 'O': +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX + if (ASSEMBLER_DIALECT == ASM_ATT) + { + switch (GET_MODE (x)) + { + case HImode: putc ('w', file); break; + case SImode: + case SFmode: putc ('l', file); break; + case DImode: + case DFmode: putc ('q', file); break; + default: gcc_unreachable (); + } + putc ('.', file); + } +#endif + return; + case 'C': + put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); + return; + case 'F': +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('.', file); +#endif + put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); + return; + + /* Like above, but reverse condition */ + case 'c': + /* Check to see if argument to %c is really a constant + and not a condition code which needs to be reversed. */ + if (!COMPARISON_P (x)) + { + output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); + return; + } + put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); + return; + case 'f': +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('.', file); +#endif + put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); + return; + + case 'H': + /* It doesn't actually matter what mode we use here, as we're + only going to use this for printing. */ + x = adjust_address_nv (x, DImode, 8); + break; + + case '+': + { + rtx x; + + if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) + return; + + x = find_reg_note (current_output_insn, REG_BR_PROB, 0); + if (x) + { + int pred_val = INTVAL (XEXP (x, 0)); + + if (pred_val < REG_BR_PROB_BASE * 45 / 100 + || pred_val > REG_BR_PROB_BASE * 55 / 100) + { + int taken = pred_val > REG_BR_PROB_BASE / 2; + int cputaken = final_forward_branch_p (current_output_insn) == 0; + + /* Emit hints only in the case default branch prediction + heuristics would fail. */ + if (taken != cputaken) + { + /* We use 3e (DS) prefix for taken branches and + 2e (CS) prefix for not taken branches. */ + if (taken) + fputs ("ds ; ", file); + else + fputs ("cs ; ", file); + } + } + } + return; + } + default: + output_operand_lossage ("invalid operand code '%c'", code); + } + } + + if (GET_CODE (x) == REG) + print_reg (x, code, file); + + else if (GET_CODE (x) == MEM) + { + /* No `byte ptr' prefix for call instructions. */ + if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') + { + const char * size; + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 1: size = "BYTE"; break; + case 2: size = "WORD"; break; + case 4: size = "DWORD"; break; + case 8: size = "QWORD"; break; + case 12: size = "XWORD"; break; + case 16: size = "XMMWORD"; break; + default: + gcc_unreachable (); + } + + /* Check for explicit size override (codes 'b', 'w' and 'k') */ + if (code == 'b') + size = "BYTE"; + else if (code == 'w') + size = "WORD"; + else if (code == 'k') + size = "DWORD"; + + fputs (size, file); + fputs (" PTR ", file); + } + + x = XEXP (x, 0); + /* Avoid (%rip) for call operands. */ + if (CONSTANT_ADDRESS_P (x) && code == 'P' + && GET_CODE (x) != CONST_INT) + output_addr_const (file, x); + else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) + output_operand_lossage ("invalid constraints for operand"); + else + output_address (x); + } + + else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE r; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + fprintf (file, "0x%08lx", l); + } + + /* These float cases don't actually occur as immediate operands. */ + else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) + { + char dstr[30]; + + real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); + fprintf (file, "%s", dstr); + } + + else if (GET_CODE (x) == CONST_DOUBLE + && GET_MODE (x) == XFmode) + { + char dstr[30]; + + real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); + fprintf (file, "%s", dstr); + } + + else + { + /* We have patterns that allow zero sets of memory, for instance. + In 64-bit mode, we should probably support all 8-byte vectors, + since we can in fact encode that into an immediate. */ + if (GET_CODE (x) == CONST_VECTOR) + { + gcc_assert (x == CONST0_RTX (GET_MODE (x))); + x = const0_rtx; + } + + if (code != 'P') + { + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + { + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + } + else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF) + { + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + else + fputs ("OFFSET FLAT:", file); + } + } + if (GET_CODE (x) == CONST_INT) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + /* APPLE LOCAL begin dynamic-no-pic */ + else if (flag_pic || (TARGET_MACHO && MACHOPIC_INDIRECT)) + /* APPLE LOCAL end dynamic-no-pic */ + output_pic_addr_const (file, x, code); + else + output_addr_const (file, x); + } +} + +/* Print a memory operand whose address is ADDR. */ + +void +print_operand_address (FILE *file, rtx addr) +{ + struct ix86_address parts; + rtx base, index, disp; + int scale; + int ok = ix86_decompose_address (addr, &parts); + + gcc_assert (ok); + + base = parts.base; + index = parts.index; + disp = parts.disp; + scale = parts.scale; + + switch (parts.seg) + { + case SEG_DEFAULT: + break; + case SEG_FS: + case SEG_GS: + if (USER_LABEL_PREFIX[0] == 0) + putc ('%', file); + fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); + break; + default: + gcc_unreachable (); + } + + if (!base && !index) + { + /* Displacement only requires special attention. */ + + if (GET_CODE (disp) == CONST_INT) + { + if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) + { + if (USER_LABEL_PREFIX[0] == 0) + putc ('%', file); + fputs ("ds:", file); + } + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); + } + else if (flag_pic) + output_pic_addr_const (file, disp, 0); + else + output_addr_const (file, disp); + + /* Use one byte shorter RIP relative addressing for 64bit mode. */ + if (TARGET_64BIT) + { + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == PLUS + && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) + disp = XEXP (XEXP (disp, 0), 0); + if (GET_CODE (disp) == LABEL_REF + || (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == 0)) + fputs ("(%rip)", file); + } + } + else + { + if (ASSEMBLER_DIALECT == ASM_ATT) + { + if (disp) + { + if (flag_pic) + output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == LABEL_REF) + output_asm_label (disp); + else + output_addr_const (file, disp); + } + + putc ('(', file); + if (base) + print_reg (base, 0, file); + if (index) + { + putc (',', file); + print_reg (index, 0, file); + if (scale != 1) + fprintf (file, ",%d", scale); + } + putc (')', file); + } + else + { + rtx offset = NULL_RTX; + + if (disp) + { + /* Pull out the offset of a symbol; print any symbol itself. */ + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == PLUS + && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) + { + offset = XEXP (XEXP (disp, 0), 1); + disp = gen_rtx_CONST (VOIDmode, + XEXP (XEXP (disp, 0), 0)); + } + + if (flag_pic) + output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == LABEL_REF) + output_asm_label (disp); + else if (GET_CODE (disp) == CONST_INT) + offset = disp; + else + output_addr_const (file, disp); + } + + putc ('[', file); + if (base) + { + print_reg (base, 0, file); + if (offset) + { + if (INTVAL (offset) >= 0) + putc ('+', file); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); + } + } + else if (offset) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); + else + putc ('0', file); + + if (index) + { + putc ('+', file); + print_reg (index, 0, file); + if (scale != 1) + fprintf (file, "*%d", scale); + } + putc (']', file); + } + } +} + +bool +output_addr_const_extra (FILE *file, rtx x) +{ + rtx op; + + if (GET_CODE (x) != UNSPEC) + return false; + + op = XVECEXP (x, 0, 0); + switch (XINT (x, 1)) + { + case UNSPEC_GOTTPOFF: + output_addr_const (file, op); + /* FIXME: This might be @TPOFF in Sun ld. */ + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TPOFF: + output_addr_const (file, op); + fputs ("@TPOFF", file); + break; + case UNSPEC_NTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs ("@TPOFF", file); + else + fputs ("@NTPOFF", file); + break; + case UNSPEC_DTPOFF: + output_addr_const (file, op); + fputs ("@DTPOFF", file); + break; + case UNSPEC_GOTNTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs ("@GOTTPOFF(%rip)", file); + else + fputs ("@GOTNTPOFF", file); + break; + case UNSPEC_INDNTPOFF: + output_addr_const (file, op); + fputs ("@INDNTPOFF", file); + break; + + default: + return false; + } + + return true; +} + +/* Split one or more DImode RTL references into pairs of SImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +{ + while (num--) + { + rtx op = operands[num]; + + /* simplify_subreg refuse to split volatile memory addresses, + but we still have to handle it. */ + if (GET_CODE (op) == MEM) + { + lo_half[num] = adjust_address (op, SImode, 0); + hi_half[num] = adjust_address (op, SImode, 4); + } + else + { + lo_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 0); + hi_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 4); + } + } +} +/* Split one or more TImode RTL references into pairs of DImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +{ + while (num--) + { + rtx op = operands[num]; + + /* simplify_subreg refuse to split volatile memory addresses, but we + still have to handle it. */ + if (GET_CODE (op) == MEM) + { + lo_half[num] = adjust_address (op, DImode, 0); + hi_half[num] = adjust_address (op, DImode, 8); + } + else + { + lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); + hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); + } + } +} + +/* Output code to perform a 387 binary operation in INSN, one of PLUS, + MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] + is the expression of the binary operation. The output may either be + emitted here, or returned to the caller, like all output_* functions. + + There is no guarantee that the operands are the same mode, as they + might be within FLOAT or FLOAT_EXTEND expressions. */ + +#ifndef SYSV386_COMPAT +/* Set to 1 for compatibility with brain-damaged assemblers. No-one + wants to fix the assemblers because that causes incompatibility + with gcc. No-one wants to fix gcc because that causes + incompatibility with assemblers... You can use the option of + -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ +#define SYSV386_COMPAT 1 +#endif + +const char * +output_387_binary_op (rtx insn, rtx *operands) +{ + static char buf[30]; + const char *p; + const char *ssep; + int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); + +#ifdef ENABLE_CHECKING + /* Even if we do not want to check the inputs, this documents input + constraints. Which helps in understanding the following code. */ + if (STACK_REG_P (operands[0]) + && ((REG_P (operands[1]) + && REGNO (operands[0]) == REGNO (operands[1]) + && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) + || (REG_P (operands[2]) + && REGNO (operands[0]) == REGNO (operands[2]) + && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) + && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) + ; /* ok */ + else + gcc_assert (is_sse); +#endif + + switch (GET_CODE (operands[3])) + { + case PLUS: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fiadd"; + else + p = "fadd"; + ssep = "add"; + break; + + case MINUS: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fisub"; + else + p = "fsub"; + ssep = "sub"; + break; + + case MULT: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fimul"; + else + p = "fmul"; + ssep = "mul"; + break; + + case DIV: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fidiv"; + else + p = "fdiv"; + ssep = "div"; + break; + + default: + gcc_unreachable (); + } + + if (is_sse) + { + strcpy (buf, ssep); + if (GET_MODE (operands[0]) == SFmode) + strcat (buf, "ss\t{%2, %0|%0, %2}"); + else + strcat (buf, "sd\t{%2, %0|%0, %2}"); + return buf; + } + strcpy (buf, p); + + switch (GET_CODE (operands[3])) + { + case MULT: + case PLUS: + if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) + { + rtx temp = operands[2]; + operands[2] = operands[1]; + operands[1] = temp; + } + + /* know operands[0] == operands[1]. */ + + if (GET_CODE (operands[2]) == MEM) + { + p = "%z2\t%2"; + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { + if (STACK_TOP_P (operands[0])) + /* How is it that we are storing to a dead operand[2]? + Well, presumably operands[1] is dead too. We can't + store the result to st(0) as st(0) gets popped on this + instruction. Instead store to operands[2] (which I + think has to be st(1)). st(1) will be popped later. + gcc <= 2.8.1 didn't have this check and generated + assembly code that the Unixware assembler rejected. */ + p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ + else + p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ + break; + } + + if (STACK_TOP_P (operands[0])) + p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ + else + p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ + break; + + case MINUS: + case DIV: + if (GET_CODE (operands[1]) == MEM) + { + p = "r%z1\t%1"; + break; + } + + if (GET_CODE (operands[2]) == MEM) + { + p = "%z2\t%2"; + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { +#if SYSV386_COMPAT + /* The SystemV/386 SVR3.2 assembler, and probably all AT&T + derived assemblers, confusingly reverse the direction of + the operation for fsub{r} and fdiv{r} when the + destination register is not st(0). The Intel assembler + doesn't have this brain damage. Read !SYSV386_COMPAT to + figure out what the hardware really does. */ + if (STACK_TOP_P (operands[0])) + p = "{p\t%0, %2|rp\t%2, %0}"; + else + p = "{rp\t%2, %0|p\t%0, %2}"; +#else + if (STACK_TOP_P (operands[0])) + /* As above for fmul/fadd, we can't store to st(0). */ + p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ + else + p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ +#endif + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { +#if SYSV386_COMPAT + if (STACK_TOP_P (operands[0])) + p = "{rp\t%0, %1|p\t%1, %0}"; + else + p = "{p\t%1, %0|rp\t%0, %1}"; +#else + if (STACK_TOP_P (operands[0])) + p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ + else + p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ +#endif + break; + } + + if (STACK_TOP_P (operands[0])) + { + if (STACK_TOP_P (operands[1])) + p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ + else + p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ + break; + } + else if (STACK_TOP_P (operands[1])) + { +#if SYSV386_COMPAT + p = "{\t%1, %0|r\t%0, %1}"; +#else + p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ +#endif + } + else + { +#if SYSV386_COMPAT + p = "{r\t%2, %0|\t%0, %2}"; +#else + p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ +#endif + } + break; + + default: + gcc_unreachable (); + } + + strcat (buf, p); + return buf; +} + +/* Return needed mode for entity in optimize_mode_switching pass. */ + +int +ix86_mode_needed (int entity, rtx insn) +{ + enum attr_i387_cw mode; + + /* The mode UNINITIALIZED is used to store control word after a + function call or ASM pattern. The mode ANY specify that function + has no requirements on the control word and make no changes in the + bits we are interested in. */ + + if (CALL_P (insn) + || (NONJUMP_INSN_P (insn) + && (asm_noperands (PATTERN (insn)) >= 0 + || GET_CODE (PATTERN (insn)) == ASM_INPUT))) + return I387_CW_UNINITIALIZED; + + if (recog_memoized (insn) < 0) + return I387_CW_ANY; + + mode = get_attr_i387_cw (insn); + + switch (entity) + { + case I387_TRUNC: + if (mode == I387_CW_TRUNC) + return mode; + break; + + case I387_FLOOR: + if (mode == I387_CW_FLOOR) + return mode; + break; + + case I387_CEIL: + if (mode == I387_CW_CEIL) + return mode; + break; + + case I387_MASK_PM: + if (mode == I387_CW_MASK_PM) + return mode; + break; + + default: + gcc_unreachable (); + } + + return I387_CW_ANY; +} + +/* Output code to initialize control word copies used by trunc?f?i and + rounding patterns. CURRENT_MODE is set to current control word, + while NEW_MODE is set to new control word. */ + +void +emit_i387_cw_initialization (int mode) +{ + rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); + rtx new_mode; + + int slot; + + rtx reg = gen_reg_rtx (HImode); + + emit_insn (gen_x86_fnstcw_1 (stored_mode)); + emit_move_insn (reg, stored_mode); + + if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) + { + switch (mode) + { + case I387_CW_TRUNC: + /* round toward zero (truncate) */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); + slot = SLOT_CW_TRUNC; + break; + + case I387_CW_FLOOR: + /* round down toward -oo */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); + slot = SLOT_CW_FLOOR; + break; + + case I387_CW_CEIL: + /* round up toward +oo */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); + slot = SLOT_CW_CEIL; + break; + + case I387_CW_MASK_PM: + /* mask precision exception for nearbyint() */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); + slot = SLOT_CW_MASK_PM; + break; + + default: + gcc_unreachable (); + } + } + else + { + switch (mode) + { + case I387_CW_TRUNC: + /* round toward zero (truncate) */ + emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); + slot = SLOT_CW_TRUNC; + break; + + case I387_CW_FLOOR: + /* round down toward -oo */ + emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); + slot = SLOT_CW_FLOOR; + break; + + case I387_CW_CEIL: + /* round up toward +oo */ + emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); + slot = SLOT_CW_CEIL; + break; + + case I387_CW_MASK_PM: + /* mask precision exception for nearbyint() */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); + slot = SLOT_CW_MASK_PM; + break; + + default: + gcc_unreachable (); + } + } + + gcc_assert (slot < MAX_386_STACK_LOCALS); + + new_mode = assign_386_stack_local (HImode, slot); + emit_move_insn (new_mode, reg); +} + +/* Output code for INSN to convert a float to a signed int. OPERANDS + are the insn operands. The output may be [HSD]Imode and the input + operand may be [SDX]Fmode. */ + +const char * +output_fix_trunc (rtx insn, rtx *operands, int fisttp) +{ + int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + int dimode_p = GET_MODE (operands[0]) == DImode; + int round_mode = get_attr_i387_cw (insn); + + /* Jump through a hoop or two for DImode, since the hardware has no + non-popping instruction. We used to do this a different way, but + that was somewhat fragile and broke with post-reload splitters. */ + if ((dimode_p || fisttp) && !stack_top_dies) + output_asm_insn ("fld\t%y1", operands); + + gcc_assert (STACK_TOP_P (operands[1])); + gcc_assert (GET_CODE (operands[0]) == MEM); + + if (fisttp) + output_asm_insn ("fisttp%z0\t%0", operands); + else + { + if (round_mode != I387_CW_ANY) + output_asm_insn ("fldcw\t%3", operands); + if (stack_top_dies || dimode_p) + output_asm_insn ("fistp%z0\t%0", operands); + else + output_asm_insn ("fist%z0\t%0", operands); + if (round_mode != I387_CW_ANY) + output_asm_insn ("fldcw\t%2", operands); + } + + return ""; +} + +/* Output code for x87 ffreep insn. The OPNO argument, which may only + have the values zero or one, indicates the ffreep insn's operand + from the OPERANDS array. */ + +static const char * +output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) +{ + if (TARGET_USE_FFREEP) +#if HAVE_AS_IX86_FFREEP + return opno ? "ffreep\t%y1" : "ffreep\t%y0"; +#else + switch (REGNO (operands[opno])) + { + case FIRST_STACK_REG + 0: return ".word\t0xc0df"; + case FIRST_STACK_REG + 1: return ".word\t0xc1df"; + case FIRST_STACK_REG + 2: return ".word\t0xc2df"; + case FIRST_STACK_REG + 3: return ".word\t0xc3df"; + case FIRST_STACK_REG + 4: return ".word\t0xc4df"; + case FIRST_STACK_REG + 5: return ".word\t0xc5df"; + case FIRST_STACK_REG + 6: return ".word\t0xc6df"; + case FIRST_STACK_REG + 7: return ".word\t0xc7df"; + } +#endif + + return opno ? "fstp\t%y1" : "fstp\t%y0"; +} + + +/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi + should be used. UNORDERED_P is true when fucom should be used. */ + +const char * +output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) +{ + int stack_top_dies; + rtx cmp_op0, cmp_op1; + int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); + + if (eflags_p) + { + cmp_op0 = operands[0]; + cmp_op1 = operands[1]; + } + else + { + cmp_op0 = operands[1]; + cmp_op1 = operands[2]; + } + + if (is_sse) + { + if (GET_MODE (operands[0]) == SFmode) + if (unordered_p) + return "ucomiss\t{%1, %0|%0, %1}"; + else + return "comiss\t{%1, %0|%0, %1}"; + else + if (unordered_p) + return "ucomisd\t{%1, %0|%0, %1}"; + else + return "comisd\t{%1, %0|%0, %1}"; + } + + gcc_assert (STACK_TOP_P (cmp_op0)); + + stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + + if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) + { + if (stack_top_dies) + { + output_asm_insn ("ftst\n\tfnstsw\t%0", operands); + return output_387_ffreep (operands, 1); + } + else + return "ftst\n\tfnstsw\t%0"; + } + + if (STACK_REG_P (cmp_op1) + && stack_top_dies + && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) + && REGNO (cmp_op1) != FIRST_STACK_REG) + { + /* If both the top of the 387 stack dies, and the other operand + is also a stack register that dies, then this must be a + `fcompp' float compare */ + + if (eflags_p) + { + /* There is no double popping fcomi variant. Fortunately, + eflags is immune from the fstp's cc clobbering. */ + if (unordered_p) + output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); + else + output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); + return output_387_ffreep (operands, 0); + } + else + { + if (unordered_p) + return "fucompp\n\tfnstsw\t%0"; + else + return "fcompp\n\tfnstsw\t%0"; + } + } + else + { + /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ + + static const char * const alt[16] = + { + "fcom%z2\t%y2\n\tfnstsw\t%0", + "fcomp%z2\t%y2\n\tfnstsw\t%0", + "fucom%z2\t%y2\n\tfnstsw\t%0", + "fucomp%z2\t%y2\n\tfnstsw\t%0", + + "ficom%z2\t%y2\n\tfnstsw\t%0", + "ficomp%z2\t%y2\n\tfnstsw\t%0", + NULL, + NULL, + + "fcomi\t{%y1, %0|%0, %y1}", + "fcomip\t{%y1, %0|%0, %y1}", + "fucomi\t{%y1, %0|%0, %y1}", + "fucomip\t{%y1, %0|%0, %y1}", + + NULL, + NULL, + NULL, + NULL + }; + + int mask; + const char *ret; + + mask = eflags_p << 3; + mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; + mask |= unordered_p << 1; + mask |= stack_top_dies; + + gcc_assert (mask < 16); + ret = alt[mask]; + gcc_assert (ret); + + return ret; + } +} + +void +ix86_output_addr_vec_elt (FILE *file, int value) +{ + const char *directive = ASM_LONG; + +#ifdef ASM_QUAD + if (TARGET_64BIT) + directive = ASM_QUAD; +#else + gcc_assert (!TARGET_64BIT); +#endif + + fprintf (file, "%s%s%d\n", directive, LPREFIX, value); +} + +void +ix86_output_addr_diff_elt (FILE *file, int value, int rel) +{ + if (TARGET_64BIT) + fprintf (file, "%s%s%d-%s%d\n", + ASM_LONG, LPREFIX, value, LPREFIX, rel); + else if (HAVE_AS_GOTOFF_IN_DATA) + fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); +#if TARGET_MACHO + else if (TARGET_MACHO) + { + fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); + machopic_output_function_base_name (file); + fprintf(file, "\n"); + } +#endif + else + asm_fprintf (file, "%s%U%s+[.-%s%d]\n", + ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); +} + +/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate + for the target. */ + +void +ix86_expand_clear (rtx dest) +{ + rtx tmp; + + /* We play register width games, which are only valid after reload. */ + gcc_assert (reload_completed); + + /* Avoid HImode and its attendant prefix byte. */ + if (GET_MODE_SIZE (GET_MODE (dest)) < 4) + dest = gen_rtx_REG (SImode, REGNO (dest)); + + tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); + + /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ + if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) + { + rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); + } + + emit_insn (tmp); +} + +/* X is an unchanging MEM. If it is a constant pool reference, return + the constant pool rtx, else NULL. */ + +rtx +maybe_get_pool_constant (rtx x) +{ + x = ix86_delegitimize_address (XEXP (x, 0)); + + if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + return get_pool_constant (x); + + return NULL_RTX; +} + +void +ix86_expand_move (enum machine_mode mode, rtx operands[]) +{ + int strict = (reload_in_progress || reload_completed); + /* APPLE LOCAL dynamic-no-pic */ + rtx insn, op0, op1; + enum tls_model model; + + op0 = operands[0]; + op1 = operands[1]; + + if (GET_CODE (op1) == SYMBOL_REF) + { + model = SYMBOL_REF_TLS_MODEL (op1); + if (model) + { + op1 = legitimize_tls_address (op1, model, true); + op1 = force_operand (op1, op0); + if (op1 == op0) + return; + } + } + else if (GET_CODE (op1) == CONST + && GET_CODE (XEXP (op1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) + { + model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); + if (model) + { + rtx addend = XEXP (XEXP (op1, 0), 1); + op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); + op1 = force_operand (op1, NULL); + op1 = expand_simple_binop (Pmode, PLUS, op1, addend, + op0, 1, OPTAB_DIRECT); + if (op1 == op0) + return; + } + } + + /* APPLE LOCAL begin dynamic-no-pic */ + /* allow macho & macho for x86_64 to coexist */ + if (((TARGET_MACHO && MACHOPIC_INDIRECT) + || flag_pic) + && mode == Pmode && symbolic_operand (op1, Pmode)) + /* APPLE LOCAL end dynamic-no-pic */ + { + if (TARGET_MACHO && !TARGET_64BIT) + { +#if TARGET_MACHO + /* APPLE LOCAL begin dynamic-no-pic */ + if (MACHOPIC_INDIRECT) + { + rtx temp = ((reload_in_progress + || ((op0 && GET_CODE (op0) == REG) + && mode == Pmode)) + ? op0 : gen_reg_rtx (Pmode)); + op1 = machopic_indirect_data_reference (op1, temp); + if (MACHOPIC_PURE) + op1 = machopic_legitimize_pic_address (op1, mode, + temp == op1 ? 0 : temp); + } + if (op0 != op1 && GET_CODE (op0) != MEM) + { + insn = gen_rtx_SET (VOIDmode, op0, op1); + emit_insn (insn); + return; + } + if (GET_CODE (op0) == MEM) + op1 = force_reg (Pmode, op1); + else + { + rtx temp = op0; + if (GET_CODE (temp) != REG) + temp = gen_reg_rtx (Pmode); + temp = legitimize_pic_address (op1, temp); + if (temp == op0) + return; + op1 = temp; + } + /* APPLE LOCAL end dynamic-no-pic */ +#endif + } + else + { + if (GET_CODE (op0) == MEM) + op1 = force_reg (Pmode, op1); + else + op1 = legitimize_address (op1, op1, Pmode); + } + } + else + { + if (GET_CODE (op0) == MEM + && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) + || !push_operand (op0, mode)) + && GET_CODE (op1) == MEM) + op1 = force_reg (mode, op1); + + if (push_operand (op0, mode) + && ! general_no_elim_operand (op1, mode)) + op1 = copy_to_mode_reg (mode, op1); + + /* Force large constants in 64bit compilation into register + to get them CSEed. */ + if (TARGET_64BIT && mode == DImode + && immediate_operand (op1, mode) + && !x86_64_zext_immediate_operand (op1, VOIDmode) + && !register_operand (op0, mode) + && optimize && !reload_completed && !reload_in_progress) + op1 = copy_to_mode_reg (mode, op1); + + if (FLOAT_MODE_P (mode)) + { + /* If we are loading a floating point constant to a register, + force the value to memory now, since we'll get better code + out the back end. */ + + if (strict) + ; + else if (GET_CODE (op1) == CONST_DOUBLE) + { + op1 = validize_mem (force_const_mem (mode, op1)); + if (!register_operand (op0, mode)) + { + rtx temp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); + emit_move_insn (op0, temp); + return; + } + } + } + } + + emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); +} + +void +ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) +{ + rtx op0 = operands[0], op1 = operands[1]; + /* APPLE LOCAL begin radar 4614623 */ + cfun->uses_vector = 1; + /* APPLE LOCAL end radar 4614623 */ + + /* Force constants other than zero into memory. We do not know how + the instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (op0, mode) + && CONSTANT_P (op1) + && standard_sse_constant_p (op1) <= 0) + op1 = validize_mem (force_const_mem (mode, op1)); + + /* Make operand1 a register if it isn't already. */ + if (!no_new_pseudos + && !register_operand (op0, mode) + && !register_operand (op1, mode)) + { + emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); + return; + } + + emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); +} + +/* Implement the movmisalign patterns for SSE. Non-SSE modes go + straight to ix86_expand_vector_move. */ + +void +ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) +{ + rtx op0, op1, m; + + op0 = operands[0]; + op1 = operands[1]; + + if (MEM_P (op1)) + { + /* If we're optimizing for size, movups is the smallest. */ + if (optimize_size) + { + op0 = gen_lowpart (V4SFmode, op0); + op1 = gen_lowpart (V4SFmode, op1); + emit_insn (gen_sse_movups (op0, op1)); + return; + } + + /* ??? If we have typed data, then it would appear that using + movdqu is the only way to get unaligned data loaded with + integer type. */ + if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + emit_insn (gen_sse2_movdqu (op0, op1)); + return; + } + + if (TARGET_SSE2 && mode == V2DFmode) + { + rtx zero; + + /* When SSE registers are split into halves, we can avoid + writing to the top half twice. */ + if (TARGET_SSE_SPLIT_REGS) + { + emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); + zero = op0; + } + else + { + /* ??? Not sure about the best option for the Intel chips. + The following would seem to satisfy; the register is + entirely cleared, breaking the dependency chain. We + then store to the upper half, with a dependency depth + of one. A rumor has it that Intel recommends two movsd + followed by an unpacklpd, but this is unconfirmed. And + given that the dependency depth of the unpacklpd would + still be one, I'm not sure why this would be better. */ + zero = CONST0_RTX (V2DFmode); + } + + m = adjust_address (op1, DFmode, 0); + emit_insn (gen_sse2_loadlpd (op0, zero, m)); + m = adjust_address (op1, DFmode, 8); + emit_insn (gen_sse2_loadhpd (op0, op0, m)); + } + else + { + if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) + emit_move_insn (op0, CONST0_RTX (mode)); + else + emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); + + if (mode != V4SFmode) + op0 = gen_lowpart (V4SFmode, op0); + m = adjust_address (op1, V2SFmode, 0); + emit_insn (gen_sse_loadlps (op0, op0, m)); + m = adjust_address (op1, V2SFmode, 8); + emit_insn (gen_sse_loadhps (op0, op0, m)); + } + } + else if (MEM_P (op0)) + { + /* If we're optimizing for size, movups is the smallest. */ + if (optimize_size) + { + op0 = gen_lowpart (V4SFmode, op0); + op1 = gen_lowpart (V4SFmode, op1); + emit_insn (gen_sse_movups (op0, op1)); + return; + } + + /* ??? Similar to above, only less clear because of quote + typeless stores unquote. */ + if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES + && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + emit_insn (gen_sse2_movdqu (op0, op1)); + return; + } + + if (TARGET_SSE2 && mode == V2DFmode) + { + m = adjust_address (op0, DFmode, 0); + emit_insn (gen_sse2_storelpd (m, op1)); + m = adjust_address (op0, DFmode, 8); + emit_insn (gen_sse2_storehpd (m, op1)); + } + else + { + if (mode != V4SFmode) + op1 = gen_lowpart (V4SFmode, op1); + m = adjust_address (op0, V2SFmode, 0); + emit_insn (gen_sse_storelps (m, op1)); + m = adjust_address (op0, V2SFmode, 8); + emit_insn (gen_sse_storehps (m, op1)); + } + } + else + gcc_unreachable (); +} + +/* Expand a push in MODE. This is some mode for which we do not support + proper push instructions, at least from the registers that we expect + the value to live in. */ + +void +ix86_expand_push (enum machine_mode mode, rtx x) +{ + rtx tmp; + + tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, + GEN_INT (-GET_MODE_SIZE (mode)), + stack_pointer_rtx, 1, OPTAB_DIRECT); + if (tmp != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, tmp); + + tmp = gen_rtx_MEM (mode, stack_pointer_rtx); + emit_move_insn (tmp, x); +} + +/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the + destination to use for the operation. If different from the true + destination in operands[0], a copy operation will be required. */ + +rtx +ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + int matching_memory; + rtx src1, src2, dst; + + dst = operands[0]; + src1 = operands[1]; + src2 = operands[2]; + + /* Recognize <var1> = <value> <op> <var1> for commutative operators */ + if (GET_RTX_CLASS (code) == RTX_COMM_ARITH + && (rtx_equal_p (dst, src2) + || immediate_operand (src1, mode))) + { + rtx temp = src1; + src1 = src2; + src2 = temp; + } + + /* If the destination is memory, and we do not have matching source + operands, do things in registers. */ + matching_memory = 0; + if (GET_CODE (dst) == MEM) + { + if (rtx_equal_p (dst, src1)) + matching_memory = 1; + else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH + && rtx_equal_p (dst, src2)) + matching_memory = 2; + else + dst = gen_reg_rtx (mode); + } + + /* Both source operands cannot be in memory. */ + if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) + { + if (matching_memory != 2) + src2 = force_reg (mode, src2); + else + src1 = force_reg (mode, src1); + } + + /* If the operation is not commutable, source 1 cannot be a constant + or non-matching memory. */ + if ((CONSTANT_P (src1) + || (!matching_memory && GET_CODE (src1) == MEM)) + && GET_RTX_CLASS (code) != RTX_COMM_ARITH) + src1 = force_reg (mode, src1); + + src1 = operands[1] = src1; + src2 = operands[2] = src2; + return dst; +} + +/* Similarly, but assume that the destination has already been + set up properly. */ + +void +ix86_fixup_binary_operands_no_copy (enum rtx_code code, + enum machine_mode mode, rtx operands[]) +{ + rtx dst = ix86_fixup_binary_operands (code, mode, operands); + gcc_assert (dst == operands[0]); +} + +/* Attempt to expand a binary operator. Make the expansion closer to the + actual machine, then just general_operand, which will allow 3 separate + memory references (one output, two input) in a single insn. */ + +void +ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + rtx src1, src2, dst, op, clob; + + dst = ix86_fixup_binary_operands (code, mode, operands); + src1 = operands[1]; + src2 = operands[2]; + + /* Emit the instruction. */ + + op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); + if (reload_in_progress) + { + /* Reload doesn't know about the flags register, and doesn't know that + it doesn't want to clobber it. We can only do this with PLUS. */ + gcc_assert (code == PLUS); + emit_insn (op); + } + else + { + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); + } + + /* Fix up the destination if needed. */ + if (dst != operands[0]) + emit_move_insn (operands[0], dst); +} + +/* Return TRUE or FALSE depending on whether the binary operator meets the + appropriate constraints. */ + +int +ix86_binary_operator_ok (enum rtx_code code, + enum machine_mode mode ATTRIBUTE_UNUSED, + rtx operands[3]) +{ + /* Both source operands cannot be in memory. */ + if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) + return 0; + /* If the operation is not commutable, source 1 cannot be a constant. */ + if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) + return 0; + /* If the destination is memory, we must have a matching source operand. */ + if (GET_CODE (operands[0]) == MEM + && ! (rtx_equal_p (operands[0], operands[1]) + || (GET_RTX_CLASS (code) == RTX_COMM_ARITH + && rtx_equal_p (operands[0], operands[2])))) + return 0; + /* If the operation is not commutable and the source 1 is memory, we must + have a matching destination. */ + if (GET_CODE (operands[1]) == MEM + && GET_RTX_CLASS (code) != RTX_COMM_ARITH + && ! rtx_equal_p (operands[0], operands[1])) + return 0; + return 1; +} + +/* Attempt to expand a unary operator. Make the expansion closer to the + actual machine, then just general_operand, which will allow 2 separate + memory references (one output, one input) in a single insn. */ + +void +ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + int matching_memory; + rtx src, dst, op, clob; + + dst = operands[0]; + src = operands[1]; + + /* If the destination is memory, and we do not have matching source + operands, do things in registers. */ + matching_memory = 0; + if (MEM_P (dst)) + { + if (rtx_equal_p (dst, src)) + matching_memory = 1; + else + dst = gen_reg_rtx (mode); + } + + /* When source operand is memory, destination must match. */ + if (MEM_P (src) && !matching_memory) + src = force_reg (mode, src); + + /* Emit the instruction. */ + + op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); + if (reload_in_progress || code == NOT) + { + /* Reload doesn't know about the flags register, and doesn't know that + it doesn't want to clobber it. */ + gcc_assert (code == NOT); + emit_insn (op); + } + else + { + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); + } + + /* Fix up the destination if needed. */ + if (dst != operands[0]) + emit_move_insn (operands[0], dst); +} + +/* Return TRUE or FALSE depending on whether the unary operator meets the + appropriate constraints. */ + +int +ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + rtx operands[2] ATTRIBUTE_UNUSED) +{ + /* If one of operands is memory, source and destination must match. */ + if ((GET_CODE (operands[0]) == MEM + || GET_CODE (operands[1]) == MEM) + && ! rtx_equal_p (operands[0], operands[1])) + return FALSE; + return TRUE; +} + +/* APPLE LOCAL begin 4176531 4424891 */ +static void +ix86_expand_vector_move2 (enum machine_mode mode, rtx op0, rtx op1) +{ + rtx operands[2]; + operands[0] = op0; + operands[1] = op1; + ix86_expand_vector_move (mode, operands); +} + +static rtvec +gen_2_4_rtvec (int scalars_per_vector, rtx val, enum machine_mode mode) +{ + rtvec rval; + switch (scalars_per_vector) + { + case 2: rval = gen_rtvec (2, val, CONST0_RTX (mode)); + break; + case 4: rval = gen_rtvec (4, val, CONST0_RTX (mode), + CONST0_RTX (mode), CONST0_RTX (mode)); + break; + default: abort (); + } + return rval; +} + +/* Convert a DFmode value in an SSE register into an unsigned SImode. + When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64 + conversion, and ignoring the upper 32 bits of the result. On + x86_64, there is an equivalent SSE %xmm->signed-int-64 conversion. + On x86_32, we don't have the instruction, nor the 64-bit + destination register it requires. Do the conversion inline in the + SSE registers. Requires SSE2. For x86_32, -mfpmath=sse, + !optimize_size only. */ +const char * +ix86_expand_convert_uns_DF2SI_sse (rtx operands[]) +{ + rtx int_zero_as_fp, int_maxval_as_fp, int_two31_as_fp; + REAL_VALUE_TYPE rvt_zero, rvt_int_maxval, rvt_int_two31; + rtx int_zero_as_xmm, int_maxval_as_xmm; + rtx fp_value = operands[1]; + rtx target = operands[0]; + rtx large_xmm; + rtx large_xmm_v2di; + rtx le_op; + rtx zero_or_two31_xmm; + rtx final_result_rtx; + rtx v_rtx; + rtx incoming_value; + + cfun->uses_vector = 1; + + real_from_integer (&rvt_zero, DFmode, 0ULL, 0ULL, 1); + int_zero_as_fp = const_double_from_real_value (rvt_zero, DFmode); + + real_from_integer (&rvt_int_maxval, DFmode, 0xffffffffULL, 0ULL, 1); + int_maxval_as_fp = const_double_from_real_value (rvt_int_maxval, DFmode); + + real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1); + int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode); + + incoming_value = force_reg (GET_MODE (operands[1]), operands[1]); + + gcc_assert (ix86_preferred_stack_boundary >= 128); + + fp_value = gen_reg_rtx (V2DFmode); + ix86_expand_vector_move2 (V2DFmode, fp_value, + gen_rtx_SUBREG (V2DFmode, incoming_value, 0)); + large_xmm = gen_reg_rtx (V2DFmode); + + v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); + ix86_expand_vector_move2 (DFmode, large_xmm, v_rtx); + le_op = gen_rtx_fmt_ee (LE, V2DFmode, + gen_rtx_SUBREG (V2DFmode, fp_value, 0), large_xmm); + /* large_xmm = (fp_value >= 2**31) ? -1 : 0 ; */ + emit_insn (gen_sse2_vmmaskcmpv2df3 (large_xmm, large_xmm, fp_value, le_op)); + + int_maxval_as_xmm = gen_reg_rtx (V2DFmode); + v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_maxval_as_fp, DFmode)); + ix86_expand_vector_move2 (DFmode, int_maxval_as_xmm, v_rtx); + + emit_insn (gen_sse2_vmsminv2df3 (fp_value, fp_value, int_maxval_as_xmm)); + + int_zero_as_xmm = gen_reg_rtx (V2DFmode); + v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_zero_as_fp, DFmode)); + + ix86_expand_vector_move2 (DFmode, int_zero_as_xmm, v_rtx); + + emit_insn (gen_sse2_vmsmaxv2df3 (fp_value, fp_value, int_zero_as_xmm)); + + zero_or_two31_xmm = gen_reg_rtx (V2DFmode); + v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); + ix86_expand_vector_move2 (DFmode, zero_or_two31_xmm, v_rtx); + + /* zero_or_two31 = (large_xmm) ? 2**31 : 0; */ + emit_insn (gen_andv2df3 (zero_or_two31_xmm, zero_or_two31_xmm, large_xmm)); + /* if (large_xmm) fp_value -= 2**31; */ + emit_insn (gen_subv2df3 (fp_value, fp_value, zero_or_two31_xmm)); + /* assert (0 <= fp_value && fp_value < 2**31); + int_result = trunc (fp_value); */ + final_result_rtx = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_cvttpd2dq (final_result_rtx, fp_value)); + + large_xmm_v2di = gen_reg_rtx (V2DImode); + emit_move_insn (large_xmm_v2di, gen_rtx_SUBREG (V2DImode, large_xmm, 0)); + emit_insn (gen_ashlv2di3 (large_xmm_v2di, large_xmm_v2di, + gen_rtx_CONST_INT (SImode, 31))); + + emit_insn (gen_xorv4si3 (final_result_rtx, final_result_rtx, + gen_rtx_SUBREG (V4SImode, large_xmm_v2di, 0))); + if (!rtx_equal_p (target, final_result_rtx)) + emit_insn (gen_sse2_stored (target, final_result_rtx)); + return ""; +} + +/* Convert a SFmode value in an SSE register into an unsigned DImode. + When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64 + conversion, and subsequently ignoring the upper 32 bits of the + result. On x86_64, there is an equivalent SSE %xmm->signed-int-64 + conversion. On x86_32, we don't have the instruction, nor the + 64-bit destination register it requires. Do the conversion inline + in the SSE registers. Requires SSE2. For x86_32, -mfpmath=sse, + !optimize_size only. */ +const char * +ix86_expand_convert_uns_SF2SI_sse (rtx operands[]) +{ + rtx int_zero_as_fp, int_two31_as_fp, int_two32_as_fp; + REAL_VALUE_TYPE rvt_zero, rvt_int_two31, rvt_int_two32; + rtx int_zero_as_xmm; + rtx fp_value = operands[1]; + rtx target = operands[0]; + rtx large_xmm; + rtx two31_xmm, two32_xmm; + rtx above_two31_xmm, above_two32_xmm; + rtx zero_or_two31_SI_xmm; + rtx le_op; + rtx zero_or_two31_SF_xmm; + rtx int_result_xmm; + rtx v_rtx; + rtx incoming_value; + + cfun->uses_vector = 1; + + real_from_integer (&rvt_zero, SFmode, 0ULL, 0ULL, 1); + int_zero_as_fp = const_double_from_real_value (rvt_zero, SFmode); + + real_from_integer (&rvt_int_two31, SFmode, 0x80000000ULL, 0ULL, 1); + int_two31_as_fp = const_double_from_real_value (rvt_int_two31, SFmode); + + real_from_integer (&rvt_int_two32, SFmode, (HOST_WIDE_INT)0x100000000ULL, + 0ULL, 1); + int_two32_as_fp = const_double_from_real_value (rvt_int_two32, SFmode); + + incoming_value = force_reg (GET_MODE (operands[1]), operands[1]); + + gcc_assert (ix86_preferred_stack_boundary >= 128); + + fp_value = gen_reg_rtx (V4SFmode); + ix86_expand_vector_move2 (V4SFmode, fp_value, + gen_rtx_SUBREG (V4SFmode, incoming_value, 0)); + large_xmm = gen_reg_rtx (V4SFmode); + + /* fp_value = MAX (fp_value, 0.0); */ + /* Preclude negative values; truncate at zero. */ + int_zero_as_xmm = gen_reg_rtx (V4SFmode); + v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, + gen_2_4_rtvec (4, int_zero_as_fp, SFmode)); + ix86_expand_vector_move2 (SFmode, int_zero_as_xmm, v_rtx); + emit_insn (gen_sse_vmsmaxv4sf3 (fp_value, fp_value, int_zero_as_xmm)); + + /* two31_xmm = 0x8000000; */ + two31_xmm = gen_reg_rtx (V4SFmode); + v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, + gen_2_4_rtvec (4, int_two31_as_fp, SFmode)); + ix86_expand_vector_move2 (SFmode, two31_xmm, v_rtx); + + /* zero_or_two31_xmm = 0x8000000; */ + zero_or_two31_SF_xmm = gen_reg_rtx (V4SFmode); + ix86_expand_vector_move2 (SFmode, zero_or_two31_SF_xmm, two31_xmm); + + /* above_two31_xmm = (fp_value >= 2**31) ? 0xffff_ffff : 0 ; */ + above_two31_xmm = gen_reg_rtx (V4SFmode); + ix86_expand_vector_move2 (SFmode, above_two31_xmm, two31_xmm); + le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two31_xmm, + gen_rtx_SUBREG (V4SFmode, two31_xmm, 0)); + emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two31_xmm, above_two31_xmm, + fp_value, le_op)); + + /* two32_xmm = 0x1_0000_0000; */ + two32_xmm = gen_reg_rtx (V4SFmode); + v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, + gen_2_4_rtvec (4, int_two32_as_fp, SFmode)); + ix86_expand_vector_move2 (SFmode, two32_xmm, v_rtx); + + /* above_two32_xmm = (fp_value >= 2**32) ? 0xffff_ffff : 0 ; */ + above_two32_xmm = gen_reg_rtx (V4SFmode); + ix86_expand_vector_move2 (SFmode, above_two32_xmm, two32_xmm); + le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two32_xmm, + gen_rtx_SUBREG (V4SFmode, two32_xmm, 0)); + emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two32_xmm, above_two32_xmm, + fp_value, le_op)); + + /* zero_or_two31_SF_xmm = (above_two31_xmm) ? 2**31 : 0; */ + emit_insn (gen_andv4sf3 (zero_or_two31_SF_xmm, zero_or_two31_SF_xmm, + above_two31_xmm)); + + /* zero_or_two31_SI_xmm = (above_two31_xmm & 0x8000_0000); */ + zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode); + emit_move_insn (zero_or_two31_SI_xmm, + gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0)); + emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm, + gen_rtx_CONST_INT (SImode, 31))); + + /* zero_or_two31_SI_xmm = (above_two_31_xmm << 31); */ + zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode); + emit_move_insn (zero_or_two31_SI_xmm, + gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0)); + emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm, + gen_rtx_CONST_INT (SImode, 31))); + + /* if (above_two31_xmm) fp_value -= 2**31; */ + /* If the input FP value is greater than 2**31, subtract that amount + from the FP value before conversion. We'll re-add that amount as + an integer after the conversion. */ + emit_insn (gen_subv4sf3 (fp_value, fp_value, zero_or_two31_SF_xmm)); + + /* assert (0.0 <= fp_value && fp_value < 2**31); + int_result_xmm = trunc (fp_value); */ + /* Apply the SSE double -> signed_int32 conversion to our biased, + clamped SF value. */ + int_result_xmm = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_cvttps2dq (int_result_xmm, fp_value)); + + /* int_result_xmm += zero_or_two_31_SI_xmm; */ + /* Restore the 2**31 bias we may have subtracted earlier. If the + input FP value was between 2**31 and 2**32, this will unbias the + result. + + input_fp_value < 2**31: this won't change the value + 2**31 <= input_fp_value < 2**32: + this will restore the 2**31 bias we subtracted earler + input_fp_value >= 2**32: this insn doesn't matter; + the next insn will clobber this result + */ + emit_insn (gen_addv4si3 (int_result_xmm, int_result_xmm, + zero_or_two31_SI_xmm)); + + /* int_result_xmm |= above_two32_xmm; */ + /* If the input value was greater than 2**32, force the integral + result to 0xffff_ffff. */ + emit_insn (gen_iorv4si3 (int_result_xmm, int_result_xmm, + gen_rtx_SUBREG (V4SImode, above_two32_xmm, 0))); + + if (!rtx_equal_p (target, int_result_xmm)) + emit_insn (gen_sse2_stored (target, int_result_xmm)); + return ""; +} + +/* Convert an unsigned DImode value into a DFmode, using only SSE. + Expects the 64-bit DImode to be supplied as two 32-bit parts in two + SSE %xmm registers; result returned in an %xmm register. Requires + SSE2; will use SSE3 if available. For x86_32, -mfpmath=sse, + !optimize_size only. */ +const char * +ix86_expand_convert_uns_DI2DF_sse (rtx operands[]) +{ + REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; + rtx bias_lo_rtx, bias_hi_rtx; + rtx target = operands[0]; + rtx fp_value = operands[1]; + rtx fp_value_hi, fp_value_lo; + rtx fp_value_hi_xmm, fp_value_lo_xmm; + rtx int_xmm; + rtx final_result_xmm, result_lo_xmm; + rtx biases, exponents; + rtvec biases_rtvec, exponents_rtvec; + + cfun->uses_vector = 1; + + gcc_assert (ix86_preferred_stack_boundary >= 128); + + int_xmm = gen_reg_rtx (V4SImode); + + fp_value = force_reg (GET_MODE (operands[1]), operands[1]); + + fp_value_lo = gen_rtx_SUBREG (SImode, fp_value, 0); + fp_value_lo_xmm = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadld (fp_value_lo_xmm, CONST0_RTX (V4SImode), + fp_value_lo)); + + fp_value_hi = gen_rtx_SUBREG (SImode, fp_value, 4); + fp_value_hi_xmm = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadld (fp_value_hi_xmm, CONST0_RTX (V4SImode), + fp_value_hi)); + + ix86_expand_vector_move2 (V4SImode, int_xmm, fp_value_hi_xmm); + emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, fp_value_lo_xmm)); + + exponents_rtvec = gen_rtvec (4, GEN_INT (0x45300000UL), + GEN_INT (0x43300000UL), + CONST0_RTX (SImode), CONST0_RTX (SImode)); + exponents = validize_mem ( + force_const_mem (V4SImode, gen_rtx_CONST_VECTOR (V4SImode, + exponents_rtvec))); + emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); + + final_result_xmm = gen_reg_rtx (V2DFmode); + ix86_expand_vector_move2 (V2DFmode, final_result_xmm, + gen_rtx_SUBREG (V2DFmode, int_xmm, 0)); + + /* Integral versions of the DFmode 'exponents' above. */ + REAL_VALUE_FROM_INT (bias_lo_rvt, 0x00000000000000ULL, 0x100000ULL, DFmode); + REAL_VALUE_FROM_INT (bias_hi_rvt, 0x10000000000000ULL, 0x000000ULL, DFmode); + bias_lo_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_lo_rvt, DFmode); + bias_hi_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_hi_rvt, DFmode); + biases_rtvec = gen_rtvec (2, bias_lo_rtx, bias_hi_rtx); + biases = validize_mem (force_const_mem (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + biases_rtvec))); + emit_insn (gen_subv2df3 (final_result_xmm, final_result_xmm, biases)); + + if (TARGET_SSE3) + { + emit_insn (gen_sse3_haddv2df3 (final_result_xmm, final_result_xmm, + final_result_xmm)); + } + else + { + result_lo_xmm = gen_reg_rtx (V2DFmode); + ix86_expand_vector_move2 (V2DFmode, result_lo_xmm, final_result_xmm); + emit_insn (gen_sse2_unpckhpd (final_result_xmm, final_result_xmm, + final_result_xmm)); + emit_insn (gen_addv2df3 (final_result_xmm, final_result_xmm, + result_lo_xmm)); + } + + if (!rtx_equal_p (target, final_result_xmm)) + emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); + + return ""; +} +/* APPLE LOCAL end 4176531 4424891 */ + +/* APPLE LOCAL begin 4424891 */ +/* Convert an unsigned SImode value into a DFmode, using only SSE. + Result returned in an %xmm register. For x86_32, -mfpmath=sse, + !optimize_size only. */ +const char * +ix86_expand_convert_uns_SI2DF_sse (rtx operands[]) +{ + REAL_VALUE_TYPE rvt_int_two31; + rtx int_value_reg; + rtx fp_value_xmm, fp_value_as_int_xmm; + rtx final_result_xmm; + rtx int_two31_as_fp, int_two31_as_fp_vec; + rtx v_rtx; + rtx target = operands[0]; + + gcc_assert (ix86_preferred_stack_boundary >= 128); + gcc_assert (GET_MODE (operands[1]) == SImode); + + cfun->uses_vector = 1; + + int_value_reg = gen_reg_rtx (SImode); + emit_move_insn (int_value_reg, operands[1]); + emit_insn (gen_addsi3 (int_value_reg, int_value_reg, + GEN_INT (-2147483648LL /* MIN_INT */))); + + fp_value_as_int_xmm = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadld (fp_value_as_int_xmm, CONST0_RTX (V4SImode), + int_value_reg)); + + fp_value_xmm = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_cvtdq2pd (fp_value_xmm, + gen_rtx_SUBREG (V4SImode, + fp_value_as_int_xmm, 0))); + + real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1); + int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode); + v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); + + int_two31_as_fp_vec = validize_mem (force_const_mem (V2DFmode, v_rtx)); + + final_result_xmm = gen_reg_rtx (V2DFmode); + emit_move_insn (final_result_xmm, fp_value_xmm); + emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm, + int_two31_as_fp_vec)); + + if (!rtx_equal_p (target, final_result_xmm)) + emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); + + return ""; +} + +/* Convert a signed DImode value into a DFmode, using only SSE. + Result returned in an %xmm register. For x86_32, -mfpmath=sse, + !optimize_size only. */ +const char * +ix86_expand_convert_sign_DI2DF_sse (rtx operands[]) +{ + rtx my_operands[2]; + REAL_VALUE_TYPE rvt_int_two32; + rtx rvt_int_two32_vec; + rtx fp_value_hi_xmm, fp_value_hi_shifted_xmm; + rtx final_result_xmm; + rtx int_two32_as_fp, int_two32_as_fp_vec; + rtx target = operands[0]; + rtx input = force_reg (DImode, operands[1]); + + gcc_assert (ix86_preferred_stack_boundary >= 128); + gcc_assert (GET_MODE (input) == DImode); + + cfun->uses_vector = 1; + + fp_value_hi_xmm = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_cvtsi2sd (fp_value_hi_xmm, fp_value_hi_xmm, + gen_rtx_SUBREG (SImode, input, 4))); + + real_from_integer (&rvt_int_two32, DFmode, 0x100000000ULL, 0ULL, 1); + int_two32_as_fp = const_double_from_real_value (rvt_int_two32, DFmode); + rvt_int_two32_vec = gen_rtx_CONST_VECTOR (V2DFmode, + gen_2_4_rtvec (2, int_two32_as_fp, DFmode)); + + int_two32_as_fp_vec = validize_mem (force_const_mem (V2DFmode, + rvt_int_two32_vec)); + + fp_value_hi_shifted_xmm = gen_reg_rtx (V2DFmode); + emit_move_insn (fp_value_hi_shifted_xmm, fp_value_hi_xmm); + emit_insn (gen_sse2_vmmulv2df3 (fp_value_hi_shifted_xmm, + fp_value_hi_shifted_xmm, + int_two32_as_fp_vec)); + + my_operands[0] = gen_reg_rtx (DFmode); + my_operands[1] = gen_rtx_SUBREG (SImode, input, 0); + (void) ix86_expand_convert_uns_SI2DF_sse (my_operands); + + final_result_xmm = REG_P (target) && GET_MODE (target) == V2DFmode + ? target : gen_reg_rtx (V2DFmode); + emit_move_insn (final_result_xmm, gen_rtx_SUBREG (V2DFmode, + my_operands[0], 0)); + emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm, + fp_value_hi_shifted_xmm)); + + if (!rtx_equal_p (target, final_result_xmm)) + emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); + + return ""; +} +/* APPLE LOCAL end 4424891 */ + +/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. + Create a mask for the sign bit in MODE for an SSE register. If VECT is + true, then replicate the mask for all elements of the vector register. + If INVERT is true, then create a mask excluding the sign bit. */ + +rtx +ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) +{ + enum machine_mode vec_mode; + HOST_WIDE_INT hi, lo; + int shift = 63; + rtvec v; + rtx mask; + + /* Find the sign bit, sign extended to 2*HWI. */ + if (mode == SFmode) + lo = 0x80000000, hi = lo < 0; + else if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << shift, hi = -1; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); + + if (invert) + lo = ~lo, hi = ~hi; + + /* Force this value into the low part of a fp vector constant. */ + mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); + mask = gen_lowpart (mode, mask); + + if (mode == SFmode) + { + if (vect) + v = gen_rtvec (4, mask, mask, mask, mask); + else + v = gen_rtvec (4, mask, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), CONST0_RTX (SFmode)); + vec_mode = V4SFmode; + } + else + { + if (vect) + v = gen_rtvec (2, mask, mask); + else + v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); + vec_mode = V2DFmode; + } + + return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); +} + +/* Generate code for floating point ABS or NEG. */ + +void +ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + rtx mask, set, use, clob, dst, src; + bool matching_memory; + bool use_sse = false; + bool vector_mode = VECTOR_MODE_P (mode); + enum machine_mode elt_mode = mode; + + if (vector_mode) + { + elt_mode = GET_MODE_INNER (mode); + use_sse = true; + } + else if (TARGET_SSE_MATH) + use_sse = SSE_FLOAT_MODE_P (mode); + + /* NEG and ABS performed with SSE use bitwise mask operations. + Create the appropriate mask now. */ + if (use_sse) + mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); + else + mask = NULL_RTX; + + dst = operands[0]; + src = operands[1]; + + /* If the destination is memory, and we don't have matching source + operands or we're using the x87, do things in registers. */ + matching_memory = false; + if (MEM_P (dst)) + { + if (use_sse && rtx_equal_p (dst, src)) + matching_memory = true; + else + dst = gen_reg_rtx (mode); + } + if (MEM_P (src) && !matching_memory) + src = force_reg (mode, src); + + if (vector_mode) + { + set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); + set = gen_rtx_SET (VOIDmode, dst, set); + emit_insn (set); + } + else + { + set = gen_rtx_fmt_e (code, mode, src); + set = gen_rtx_SET (VOIDmode, dst, set); + if (mask) + { + use = gen_rtx_USE (VOIDmode, mask); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, set, use, clob))); + } + else + emit_insn (set); + } + + if (dst != operands[0]) + emit_move_insn (operands[0], dst); +} + +/* Expand a copysign operation. Special case operand 0 being a constant. */ + +void +ix86_expand_copysign (rtx operands[]) +{ + enum machine_mode mode, vmode; + rtx dest, op0, op1, mask, nmask; + + dest = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + + mode = GET_MODE (dest); + vmode = mode == SFmode ? V4SFmode : V2DFmode; + + if (GET_CODE (op0) == CONST_DOUBLE) + { + rtvec v; + + if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) + op0 = simplify_unary_operation (ABS, mode, op0, mode); + + if (op0 == CONST0_RTX (mode)) + op0 = CONST0_RTX (vmode); + else + { + if (mode == SFmode) + v = gen_rtvec (4, op0, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), CONST0_RTX (SFmode)); + else + v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); + op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); + } + + mask = ix86_build_signbit_mask (mode, 0, 0); + + if (mode == SFmode) + emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); + else + emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); + } + else + { + nmask = ix86_build_signbit_mask (mode, 0, 1); + mask = ix86_build_signbit_mask (mode, 0, 0); + + if (mode == SFmode) + emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); + else + emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); + } +} + +/* Deconstruct a copysign operation into bit masks. Operand 0 is known to + be a constant, and so has already been expanded into a vector constant. */ + +void +ix86_split_copysign_const (rtx operands[]) +{ + enum machine_mode mode, vmode; + rtx dest, op0, op1, mask, x; + + dest = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + mask = operands[3]; + + mode = GET_MODE (dest); + vmode = GET_MODE (mask); + + dest = simplify_gen_subreg (vmode, dest, mode, 0); + x = gen_rtx_AND (vmode, dest, mask); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + + if (op0 != CONST0_RTX (vmode)) + { + x = gen_rtx_IOR (vmode, dest, op0); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } +} + +/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, + so we have to do two masks. */ + +void +ix86_split_copysign_var (rtx operands[]) +{ + enum machine_mode mode, vmode; + rtx dest, scratch, op0, op1, mask, nmask, x; + + dest = operands[0]; + scratch = operands[1]; + op0 = operands[2]; + op1 = operands[3]; + nmask = operands[4]; + mask = operands[5]; + + mode = GET_MODE (dest); + vmode = GET_MODE (mask); + + if (rtx_equal_p (op0, op1)) + { + /* Shouldn't happen often (it's useless, obviously), but when it does + we'd generate incorrect code if we continue below. */ + emit_move_insn (dest, op0); + return; + } + + if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ + { + gcc_assert (REGNO (op1) == REGNO (scratch)); + + x = gen_rtx_AND (vmode, scratch, mask); + emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); + + dest = mask; + op0 = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_NOT (vmode, dest); + x = gen_rtx_AND (vmode, x, op0); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + else + { + if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ + { + x = gen_rtx_AND (vmode, scratch, mask); + } + else /* alternative 2,4 */ + { + gcc_assert (REGNO (mask) == REGNO (scratch)); + op1 = simplify_gen_subreg (vmode, op1, mode, 0); + x = gen_rtx_AND (vmode, scratch, op1); + } + emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); + + if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ + { + dest = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_AND (vmode, dest, nmask); + } + else /* alternative 3,4 */ + { + gcc_assert (REGNO (nmask) == REGNO (dest)); + dest = nmask; + op0 = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_AND (vmode, dest, op0); + } + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + + x = gen_rtx_IOR (vmode, dest, scratch); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); +} + +/* Return TRUE or FALSE depending on whether the first SET in INSN + has source and destination with matching CC modes, and that the + CC mode is at least as constrained as REQ_MODE. */ + +int +ix86_match_ccmode (rtx insn, enum machine_mode req_mode) +{ + rtx set; + enum machine_mode set_mode; + + set = PATTERN (insn); + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + gcc_assert (GET_CODE (set) == SET); + gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); + + set_mode = GET_MODE (SET_DEST (set)); + switch (set_mode) + { + case CCNOmode: + if (req_mode != CCNOmode + && (req_mode != CCmode + || XEXP (SET_SRC (set), 1) != const0_rtx)) + return 0; + break; + case CCmode: + if (req_mode == CCGCmode) + return 0; + /* FALLTHRU */ + case CCGCmode: + if (req_mode == CCGOCmode || req_mode == CCNOmode) + return 0; + /* FALLTHRU */ + case CCGOCmode: + if (req_mode == CCZmode) + return 0; + /* FALLTHRU */ + case CCZmode: + break; + + default: + gcc_unreachable (); + } + + return (GET_MODE (SET_SRC (set)) == set_mode); +} + +/* Generate insn patterns to do an integer compare of OPERANDS. */ + +static rtx +ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) +{ + enum machine_mode cmpmode; + rtx tmp, flags; + + cmpmode = SELECT_CC_MODE (code, op0, op1); + flags = gen_rtx_REG (cmpmode, FLAGS_REG); + + /* This is very simple, but making the interface the same as in the + FP case makes the rest of the code easier. */ + tmp = gen_rtx_COMPARE (cmpmode, op0, op1); + emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); + + /* Return the test that should be put into the flags user, i.e. + the bcc, scc, or cmov instruction. */ + return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); +} + +/* Figure out whether to use ordered or unordered fp comparisons. + Return the appropriate mode to use. */ + +enum machine_mode +ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) +{ + /* ??? In order to make all comparisons reversible, we do all comparisons + non-trapping when compiling for IEEE. Once gcc is able to distinguish + all forms trapping and nontrapping comparisons, we can make inequality + comparisons trapping again, since it results in better code when using + FCOM based compares. */ + return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; +} + +enum machine_mode +ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) +{ + if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) + return ix86_fp_compare_mode (code); + switch (code) + { + /* Only zero flag is needed. */ + case EQ: /* ZF=0 */ + case NE: /* ZF!=0 */ + return CCZmode; + /* Codes needing carry flag. */ + case GEU: /* CF=0 */ + case GTU: /* CF=0 & ZF=0 */ + case LTU: /* CF=1 */ + case LEU: /* CF=1 | ZF=1 */ + return CCmode; + /* Codes possibly doable only with sign flag when + comparing against zero. */ + case GE: /* SF=OF or SF=0 */ + case LT: /* SF<>OF or SF=1 */ + if (op1 == const0_rtx) + return CCGOCmode; + else + /* For other cases Carry flag is not required. */ + return CCGCmode; + /* Codes doable only with sign flag when comparing + against zero, but we miss jump instruction for it + so we need to use relational tests against overflow + that thus needs to be zero. */ + case GT: /* ZF=0 & SF=OF */ + case LE: /* ZF=1 | SF<>OF */ + if (op1 == const0_rtx) + return CCNOmode; + else + return CCGCmode; + /* strcmp pattern do (use flags) and combine may ask us for proper + mode. */ + case USE: + return CCmode; + default: + gcc_unreachable (); + } +} + +/* Return the fixed registers used for condition codes. */ + +static bool +ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +{ + *p1 = FLAGS_REG; + *p2 = FPSR_REG; + return true; +} + +/* If two condition code modes are compatible, return a condition code + mode which is compatible with both. Otherwise, return + VOIDmode. */ + +static enum machine_mode +ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) +{ + if (m1 == m2) + return m1; + + if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) + return VOIDmode; + + if ((m1 == CCGCmode && m2 == CCGOCmode) + || (m1 == CCGOCmode && m2 == CCGCmode)) + return CCGCmode; + + switch (m1) + { + default: + gcc_unreachable (); + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCZmode: + switch (m2) + { + default: + return VOIDmode; + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCZmode: + return CCmode; + } + + case CCFPmode: + case CCFPUmode: + /* These are only compatible with themselves, which we already + checked above. */ + return VOIDmode; + } +} + +/* Return true if we should use an FCOMI instruction for this fp comparison. */ + +int +ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) +{ + enum rtx_code swapped_code = swap_condition (code); + return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) + || (ix86_fp_comparison_cost (swapped_code) + == ix86_fp_comparison_fcomi_cost (swapped_code))); +} + +/* Swap, force into registers, or otherwise massage the two operands + to a fp comparison. The operands are updated in place; the new + comparison code is returned. */ + +static enum rtx_code +ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) +{ + enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); + rtx op0 = *pop0, op1 = *pop1; + enum machine_mode op_mode = GET_MODE (op0); + int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); + + /* All of the unordered compare instructions only work on registers. + The same is true of the fcomi compare instructions. The XFmode + compare instructions require registers except when comparing + against zero or when converting operand 1 from fixed point to + floating point. */ + + if (!is_sse + && (fpcmp_mode == CCFPUmode + || (op_mode == XFmode + && ! (standard_80387_constant_p (op0) == 1 + || standard_80387_constant_p (op1) == 1) + && GET_CODE (op1) != FLOAT) + || ix86_use_fcomi_compare (code))) + { + op0 = force_reg (op_mode, op0); + op1 = force_reg (op_mode, op1); + } + else + { + /* %%% We only allow op1 in memory; op0 must be st(0). So swap + things around if they appear profitable, otherwise force op0 + into a register. */ + + if (standard_80387_constant_p (op0) == 0 + || (GET_CODE (op0) == MEM + && ! (standard_80387_constant_p (op1) == 0 + || GET_CODE (op1) == MEM))) + { + rtx tmp; + tmp = op0, op0 = op1, op1 = tmp; + code = swap_condition (code); + } + + if (GET_CODE (op0) != REG) + op0 = force_reg (op_mode, op0); + + if (CONSTANT_P (op1)) + { + int tmp = standard_80387_constant_p (op1); + if (tmp == 0) + op1 = validize_mem (force_const_mem (op_mode, op1)); + else if (tmp == 1) + { + if (TARGET_CMOVE) + op1 = force_reg (op_mode, op1); + } + else + op1 = force_reg (op_mode, op1); + } + } + + /* Try to rearrange the comparison to make it cheaper. */ + if (ix86_fp_comparison_cost (code) + > ix86_fp_comparison_cost (swap_condition (code)) + && (GET_CODE (op1) == REG || !no_new_pseudos)) + { + rtx tmp; + tmp = op0, op0 = op1, op1 = tmp; + code = swap_condition (code); + if (GET_CODE (op0) != REG) + op0 = force_reg (op_mode, op0); + } + + *pop0 = op0; + *pop1 = op1; + return code; +} + +/* Convert comparison codes we use to represent FP comparison to integer + code that will result in proper branch. Return UNKNOWN if no such code + is available. */ + +enum rtx_code +ix86_fp_compare_code_to_integer (enum rtx_code code) +{ + switch (code) + { + case GT: + return GTU; + case GE: + return GEU; + case ORDERED: + case UNORDERED: + return code; + break; + case UNEQ: + return EQ; + break; + case UNLT: + return LTU; + break; + case UNLE: + return LEU; + break; + case LTGT: + return NE; + break; + default: + return UNKNOWN; + } +} + +/* Split comparison code CODE into comparisons we can do using branch + instructions. BYPASS_CODE is comparison code for branch that will + branch around FIRST_CODE and SECOND_CODE. If some of branches + is not required, set value to UNKNOWN. + We never require more than two branches. */ + +void +ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, + enum rtx_code *first_code, + enum rtx_code *second_code) +{ + *first_code = code; + *bypass_code = UNKNOWN; + *second_code = UNKNOWN; + + /* The fcomi comparison sets flags as follows: + + cmp ZF PF CF + > 0 0 0 + < 0 0 1 + = 1 0 0 + un 1 1 1 */ + + switch (code) + { + case GT: /* GTU - CF=0 & ZF=0 */ + case GE: /* GEU - CF=0 */ + case ORDERED: /* PF=0 */ + case UNORDERED: /* PF=1 */ + case UNEQ: /* EQ - ZF=1 */ + case UNLT: /* LTU - CF=1 */ + case UNLE: /* LEU - CF=1 | ZF=1 */ + case LTGT: /* EQ - ZF=0 */ + break; + case LT: /* LTU - CF=1 - fails on unordered */ + *first_code = UNLT; + *bypass_code = UNORDERED; + break; + case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ + *first_code = UNLE; + *bypass_code = UNORDERED; + break; + case EQ: /* EQ - ZF=1 - fails on unordered */ + *first_code = UNEQ; + *bypass_code = UNORDERED; + break; + case NE: /* NE - ZF=0 - fails on unordered */ + *first_code = LTGT; + *second_code = UNORDERED; + break; + case UNGE: /* GEU - CF=0 - fails on unordered */ + *first_code = GE; + *second_code = UNORDERED; + break; + case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ + *first_code = GT; + *second_code = UNORDERED; + break; + default: + gcc_unreachable (); + } + if (!TARGET_IEEE_FP) + { + *second_code = UNKNOWN; + *bypass_code = UNKNOWN; + } +} + +/* Return cost of comparison done fcom + arithmetics operations on AX. + All following functions do use number of instructions as a cost metrics. + In future this should be tweaked to compute bytes for optimize_size and + take into account performance of various instructions on various CPUs. */ +static int +ix86_fp_comparison_arithmetics_cost (enum rtx_code code) +{ + if (!TARGET_IEEE_FP) + return 4; + /* The cost of code output by ix86_expand_fp_compare. */ + switch (code) + { + case UNLE: + case UNLT: + case LTGT: + case GT: + case GE: + case UNORDERED: + case ORDERED: + case UNEQ: + return 4; + break; + case LT: + case NE: + case EQ: + case UNGE: + return 5; + break; + case LE: + case UNGT: + return 6; + break; + default: + gcc_unreachable (); + } +} + +/* Return cost of comparison done using fcomi operation. + See ix86_fp_comparison_arithmetics_cost for the metrics. */ +static int +ix86_fp_comparison_fcomi_cost (enum rtx_code code) +{ + enum rtx_code bypass_code, first_code, second_code; + /* Return arbitrarily high cost when instruction is not supported - this + prevents gcc from using it. */ + if (!TARGET_CMOVE) + return 1024; + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); + return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; +} + +/* Return cost of comparison done using sahf operation. + See ix86_fp_comparison_arithmetics_cost for the metrics. */ +static int +ix86_fp_comparison_sahf_cost (enum rtx_code code) +{ + enum rtx_code bypass_code, first_code, second_code; + /* Return arbitrarily high cost when instruction is not preferred - this + avoids gcc from using it. */ + if (!TARGET_USE_SAHF && !optimize_size) + return 1024; + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); + return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; +} + +/* Compute cost of the comparison done using any method. + See ix86_fp_comparison_arithmetics_cost for the metrics. */ +static int +ix86_fp_comparison_cost (enum rtx_code code) +{ + int fcomi_cost, sahf_cost, arithmetics_cost = 1024; + int min; + + fcomi_cost = ix86_fp_comparison_fcomi_cost (code); + sahf_cost = ix86_fp_comparison_sahf_cost (code); + + min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); + if (min > sahf_cost) + min = sahf_cost; + if (min > fcomi_cost) + min = fcomi_cost; + return min; +} + +/* Generate insn patterns to do a floating point compare of OPERANDS. */ + +static rtx +ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, + rtx *second_test, rtx *bypass_test) +{ + enum machine_mode fpcmp_mode, intcmp_mode; + rtx tmp, tmp2; + int cost = ix86_fp_comparison_cost (code); + enum rtx_code bypass_code, first_code, second_code; + + fpcmp_mode = ix86_fp_compare_mode (code); + code = ix86_prepare_fp_compare_args (code, &op0, &op1); + + if (second_test) + *second_test = NULL_RTX; + if (bypass_test) + *bypass_test = NULL_RTX; + + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); + + /* Do fcomi/sahf based test when profitable. */ + if ((bypass_code == UNKNOWN || bypass_test) + && (second_code == UNKNOWN || second_test) + && ix86_fp_comparison_arithmetics_cost (code) > cost) + { + if (TARGET_CMOVE) + { + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), + tmp); + emit_insn (tmp); + } + else + { + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); + if (!scratch) + scratch = gen_reg_rtx (HImode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); + emit_insn (gen_x86_sahf_1 (scratch)); + } + + /* The FP codes work out to act like unsigned. */ + intcmp_mode = fpcmp_mode; + code = first_code; + if (bypass_code != UNKNOWN) + *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, + gen_rtx_REG (intcmp_mode, FLAGS_REG), + const0_rtx); + if (second_code != UNKNOWN) + *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, + gen_rtx_REG (intcmp_mode, FLAGS_REG), + const0_rtx); + } + else + { + /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); + if (!scratch) + scratch = gen_reg_rtx (HImode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); + + /* In the unordered case, we have to check C2 for NaN's, which + doesn't happen to work out to anything nice combination-wise. + So do some bit twiddling on the value we've got in AH to come + up with an appropriate set of condition codes. */ + + intcmp_mode = CCNOmode; + switch (code) + { + case GT: + case UNGT: + if (code == GT || !TARGET_IEEE_FP) + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + code = EQ; + } + else + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); + intcmp_mode = CCmode; + code = GEU; + } + break; + case LT: + case UNLT: + if (code == LT && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); + intcmp_mode = CCmode; + code = EQ; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); + code = NE; + } + break; + case GE: + case UNGE: + if (code == GE || !TARGET_IEEE_FP) + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); + code = EQ; + } + else + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, + GEN_INT (0x01))); + code = NE; + } + break; + case LE: + case UNLE: + if (code == LE && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); + intcmp_mode = CCmode; + code = LTU; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + code = NE; + } + break; + case EQ: + case UNEQ: + if (code == EQ && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); + intcmp_mode = CCmode; + code = EQ; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + code = NE; + break; + } + break; + case NE: + case LTGT: + if (code == NE && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, + GEN_INT (0x40))); + code = NE; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + code = EQ; + } + break; + + case UNORDERED: + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + code = NE; + break; + case ORDERED: + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + code = EQ; + break; + + default: + gcc_unreachable (); + } + } + + /* Return the test that should be put into the flags user, i.e. + the bcc, scc, or cmov instruction. */ + return gen_rtx_fmt_ee (code, VOIDmode, + gen_rtx_REG (intcmp_mode, FLAGS_REG), + const0_rtx); +} + +rtx +ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) +{ + rtx op0, op1, ret; + op0 = ix86_compare_op0; + op1 = ix86_compare_op1; + + if (second_test) + *second_test = NULL_RTX; + if (bypass_test) + *bypass_test = NULL_RTX; + + if (ix86_compare_emitted) + { + ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); + ix86_compare_emitted = NULL_RTX; + } + else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) + ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, + second_test, bypass_test); + else + ret = ix86_expand_int_compare (code, op0, op1); + + return ret; +} + +/* Return true if the CODE will result in nontrivial jump sequence. */ +bool +ix86_fp_jump_nontrivial_p (enum rtx_code code) +{ + enum rtx_code bypass_code, first_code, second_code; + if (!TARGET_CMOVE) + return true; + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); + return bypass_code != UNKNOWN || second_code != UNKNOWN; +} + +void +ix86_expand_branch (enum rtx_code code, rtx label) +{ + rtx tmp; + + /* If we have emitted a compare insn, go straight to simple. + ix86_expand_compare won't emit anything if ix86_compare_emitted + is non NULL. */ + if (ix86_compare_emitted) + goto simple; + + switch (GET_MODE (ix86_compare_op0)) + { + case QImode: + case HImode: + case SImode: + simple: + tmp = ix86_expand_compare (code, NULL, NULL); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + return; + + case SFmode: + case DFmode: + case XFmode: + { + rtvec vec; + int use_fcomi; + enum rtx_code bypass_code, first_code, second_code; + + code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, + &ix86_compare_op1); + + ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); + + /* Check whether we will use the natural sequence with one jump. If + so, we can expand jump early. Otherwise delay expansion by + creating compound insn to not confuse optimizers. */ + if (bypass_code == UNKNOWN && second_code == UNKNOWN + && TARGET_CMOVE) + { + ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx, NULL_RTX, NULL_RTX); + } + else + { + tmp = gen_rtx_fmt_ee (code, VOIDmode, + ix86_compare_op0, ix86_compare_op1); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); + + use_fcomi = ix86_use_fcomi_compare (code); + vec = rtvec_alloc (3 + !use_fcomi); + RTVEC_ELT (vec, 0) = tmp; + RTVEC_ELT (vec, 1) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); + RTVEC_ELT (vec, 2) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); + if (! use_fcomi) + RTVEC_ELT (vec, 3) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); + + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); + } + return; + } + + case DImode: + if (TARGET_64BIT) + goto simple; + case TImode: + /* Expand DImode branch into multiple compare+branch. */ + { + rtx lo[2], hi[2], label2; + enum rtx_code code1, code2, code3; + enum machine_mode submode; + + if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) + { + tmp = ix86_compare_op0; + ix86_compare_op0 = ix86_compare_op1; + ix86_compare_op1 = tmp; + code = swap_condition (code); + } + if (GET_MODE (ix86_compare_op0) == DImode) + { + split_di (&ix86_compare_op0, 1, lo+0, hi+0); + split_di (&ix86_compare_op1, 1, lo+1, hi+1); + submode = SImode; + } + else + { + split_ti (&ix86_compare_op0, 1, lo+0, hi+0); + split_ti (&ix86_compare_op1, 1, lo+1, hi+1); + submode = DImode; + } + + /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to + avoid two branches. This costs one extra insn, so disable when + optimizing for size. */ + + if ((code == EQ || code == NE) + && (!optimize_size + || hi[1] == const0_rtx || lo[1] == const0_rtx)) + { + rtx xor0, xor1; + + xor1 = hi[0]; + if (hi[1] != const0_rtx) + xor1 = expand_binop (submode, xor_optab, xor1, hi[1], + NULL_RTX, 0, OPTAB_WIDEN); + + xor0 = lo[0]; + if (lo[1] != const0_rtx) + xor0 = expand_binop (submode, xor_optab, xor0, lo[1], + NULL_RTX, 0, OPTAB_WIDEN); + + tmp = expand_binop (submode, ior_optab, xor1, xor0, + NULL_RTX, 0, OPTAB_WIDEN); + + ix86_compare_op0 = tmp; + ix86_compare_op1 = const0_rtx; + ix86_expand_branch (code, label); + return; + } + + /* Otherwise, if we are doing less-than or greater-or-equal-than, + op1 is a constant and the low word is zero, then we can just + examine the high word. */ + + if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) + switch (code) + { + case LT: case LTU: case GE: case GEU: + ix86_compare_op0 = hi[0]; + ix86_compare_op1 = hi[1]; + ix86_expand_branch (code, label); + return; + default: + break; + } + + /* Otherwise, we need two or three jumps. */ + + label2 = gen_label_rtx (); + + code1 = code; + code2 = swap_condition (code); + code3 = unsigned_condition (code); + + switch (code) + { + case LT: case GT: case LTU: case GTU: + break; + + case LE: code1 = LT; code2 = GT; break; + case GE: code1 = GT; code2 = LT; break; + case LEU: code1 = LTU; code2 = GTU; break; + case GEU: code1 = GTU; code2 = LTU; break; + + case EQ: code1 = UNKNOWN; code2 = NE; break; + case NE: code2 = UNKNOWN; break; + + default: + gcc_unreachable (); + } + + /* + * a < b => + * if (hi(a) < hi(b)) goto true; + * if (hi(a) > hi(b)) goto false; + * if (lo(a) < lo(b)) goto true; + * false: + */ + + ix86_compare_op0 = hi[0]; + ix86_compare_op1 = hi[1]; + + if (code1 != UNKNOWN) + ix86_expand_branch (code1, label); + if (code2 != UNKNOWN) + ix86_expand_branch (code2, label2); + + ix86_compare_op0 = lo[0]; + ix86_compare_op1 = lo[1]; + ix86_expand_branch (code3, label); + + if (code2 != UNKNOWN) + emit_label (label2); + return; + } + + default: + gcc_unreachable (); + } +} + +/* Split branch based on floating point condition. */ +void +ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, + rtx target1, rtx target2, rtx tmp, rtx pushed) +{ + rtx second, bypass; + rtx label = NULL_RTX; + rtx condition; + int bypass_probability = -1, second_probability = -1, probability = -1; + rtx i; + + if (target2 != pc_rtx) + { + rtx tmp = target2; + code = reverse_condition_maybe_unordered (code); + target2 = target1; + target1 = tmp; + } + + condition = ix86_expand_fp_compare (code, op1, op2, + tmp, &second, &bypass); + + /* Remove pushed operand from stack. */ + if (pushed) + ix86_free_from_memory (GET_MODE (pushed)); + + if (split_branch_probability >= 0) + { + /* Distribute the probabilities across the jumps. + Assume the BYPASS and SECOND to be always test + for UNORDERED. */ + probability = split_branch_probability; + + /* Value of 1 is low enough to make no need for probability + to be updated. Later we may run some experiments and see + if unordered values are more frequent in practice. */ + if (bypass) + bypass_probability = 1; + if (second) + second_probability = 1; + } + if (bypass != NULL_RTX) + { + label = gen_label_rtx (); + i = emit_jump_insn (gen_rtx_SET + (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + bypass, + gen_rtx_LABEL_REF (VOIDmode, + label), + pc_rtx))); + if (bypass_probability >= 0) + REG_NOTES (i) + = gen_rtx_EXPR_LIST (REG_BR_PROB, + GEN_INT (bypass_probability), + REG_NOTES (i)); + } + i = emit_jump_insn (gen_rtx_SET + (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + condition, target1, target2))); + if (probability >= 0) + REG_NOTES (i) + = gen_rtx_EXPR_LIST (REG_BR_PROB, + GEN_INT (probability), + REG_NOTES (i)); + if (second != NULL_RTX) + { + i = emit_jump_insn (gen_rtx_SET + (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, + target2))); + if (second_probability >= 0) + REG_NOTES (i) + = gen_rtx_EXPR_LIST (REG_BR_PROB, + GEN_INT (second_probability), + REG_NOTES (i)); + } + if (label != NULL_RTX) + emit_label (label); +} + +int +ix86_expand_setcc (enum rtx_code code, rtx dest) +{ + rtx ret, tmp, tmpreg, equiv; + rtx second_test, bypass_test; + + if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) + return 0; /* FAIL */ + + gcc_assert (GET_MODE (dest) == QImode); + + ret = ix86_expand_compare (code, &second_test, &bypass_test); + PUT_MODE (ret, QImode); + + tmp = dest; + tmpreg = dest; + + emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); + if (bypass_test || second_test) + { + rtx test = second_test; + int bypass = 0; + rtx tmp2 = gen_reg_rtx (QImode); + if (bypass_test) + { + gcc_assert (!second_test); + test = bypass_test; + bypass = 1; + PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); + } + PUT_MODE (test, QImode); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); + + if (bypass) + emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); + else + emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); + } + + /* Attach a REG_EQUAL note describing the comparison result. */ + if (ix86_compare_op0 && ix86_compare_op1) + { + equiv = simplify_gen_relational (code, QImode, + GET_MODE (ix86_compare_op0), + ix86_compare_op0, ix86_compare_op1); + set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); + } + + return 1; /* DONE */ +} + +/* Expand comparison setting or clearing carry flag. Return true when + successful and set pop for the operation. */ +static bool +ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) +{ + enum machine_mode mode = + GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); + + /* Do not handle DImode compares that go through special path. Also we can't + deal with FP compares yet. This is possible to add. */ + if (mode == (TARGET_64BIT ? TImode : DImode)) + return false; + if (FLOAT_MODE_P (mode)) + { + rtx second_test = NULL, bypass_test = NULL; + rtx compare_op, compare_seq; + + /* Shortcut: following common codes never translate into carry flag compares. */ + if (code == EQ || code == NE || code == UNEQ || code == LTGT + || code == ORDERED || code == UNORDERED) + return false; + + /* These comparisons require zero flag; swap operands so they won't. */ + if ((code == GT || code == UNLE || code == LE || code == UNGT) + && !TARGET_IEEE_FP) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + code = swap_condition (code); + } + + /* Try to expand the comparison and verify that we end up with carry flag + based comparison. This is fails to be true only when we decide to expand + comparison using arithmetic that is not too common scenario. */ + start_sequence (); + compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, + &second_test, &bypass_test); + compare_seq = get_insns (); + end_sequence (); + + if (second_test || bypass_test) + return false; + if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode + || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) + code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); + else + code = GET_CODE (compare_op); + if (code != LTU && code != GEU) + return false; + emit_insn (compare_seq); + *pop = compare_op; + return true; + } + if (!INTEGRAL_MODE_P (mode)) + return false; + switch (code) + { + case LTU: + case GEU: + break; + + /* Convert a==0 into (unsigned)a<1. */ + case EQ: + case NE: + if (op1 != const0_rtx) + return false; + op1 = const1_rtx; + code = (code == EQ ? LTU : GEU); + break; + + /* Convert a>b into b<a or a>=b-1. */ + case GTU: + case LEU: + if (GET_CODE (op1) == CONST_INT) + { + op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); + /* Bail out on overflow. We still can swap operands but that + would force loading of the constant into register. */ + if (op1 == const0_rtx + || !x86_64_immediate_operand (op1, GET_MODE (op1))) + return false; + code = (code == GTU ? GEU : LTU); + } + else + { + rtx tmp = op1; + op1 = op0; + op0 = tmp; + code = (code == GTU ? LTU : GEU); + } + break; + + /* Convert a>=0 into (unsigned)a<0x80000000. */ + case LT: + case GE: + if (mode == DImode || op1 != const0_rtx) + return false; + op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); + code = (code == LT ? GEU : LTU); + break; + case LE: + case GT: + if (mode == DImode || op1 != constm1_rtx) + return false; + op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); + code = (code == LE ? GEU : LTU); + break; + + default: + return false; + } + /* Swapping operands may cause constant to appear as first operand. */ + if (!nonimmediate_operand (op0, VOIDmode)) + { + if (no_new_pseudos) + return false; + op0 = force_reg (mode, op0); + } + ix86_compare_op0 = op0; + ix86_compare_op1 = op1; + *pop = ix86_expand_compare (code, NULL, NULL); + gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); + return true; +} + +int +ix86_expand_int_movcc (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]), compare_code; + rtx compare_seq, compare_op; + rtx second_test, bypass_test; + enum machine_mode mode = GET_MODE (operands[0]); + bool sign_bit_compare_p = false;; + + start_sequence (); + compare_op = ix86_expand_compare (code, &second_test, &bypass_test); + compare_seq = get_insns (); + end_sequence (); + + compare_code = GET_CODE (compare_op); + + if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) + || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) + sign_bit_compare_p = true; + + /* Don't attempt mode expansion here -- if we had to expand 5 or 6 + HImode insns, we'd be swallowed in word prefix ops. */ + + if ((mode != HImode || TARGET_FAST_PREFIX) + && (mode != (TARGET_64BIT ? TImode : DImode)) + && GET_CODE (operands[2]) == CONST_INT + && GET_CODE (operands[3]) == CONST_INT) + { + rtx out = operands[0]; + HOST_WIDE_INT ct = INTVAL (operands[2]); + HOST_WIDE_INT cf = INTVAL (operands[3]); + HOST_WIDE_INT diff; + + diff = ct - cf; + /* Sign bit compares are better done using shifts than we do by using + sbb. */ + if (sign_bit_compare_p + || ix86_expand_carry_flag_compare (code, ix86_compare_op0, + ix86_compare_op1, &compare_op)) + { + /* Detect overlap between destination and compare sources. */ + rtx tmp = out; + + if (!sign_bit_compare_p) + { + bool fpcmp = false; + + compare_code = GET_CODE (compare_op); + + if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode + || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) + { + fpcmp = true; + compare_code = ix86_fp_compare_code_to_integer (compare_code); + } + + /* To simplify rest of code, restrict to the GEU case. */ + if (compare_code == LTU) + { + HOST_WIDE_INT tmp = ct; + ct = cf; + cf = tmp; + compare_code = reverse_condition (compare_code); + code = reverse_condition (code); + } + else + { + if (fpcmp) + PUT_CODE (compare_op, + reverse_condition_maybe_unordered + (GET_CODE (compare_op))); + else + PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); + } + diff = ct - cf; + + if (reg_overlap_mentioned_p (out, ix86_compare_op0) + || reg_overlap_mentioned_p (out, ix86_compare_op1)) + tmp = gen_reg_rtx (mode); + + if (mode == DImode) + emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); + else + emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); + } + else + { + if (code == GT || code == GE) + code = reverse_condition (code); + else + { + HOST_WIDE_INT tmp = ct; + ct = cf; + cf = tmp; + diff = ct - cf; + } + tmp = emit_store_flag (tmp, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, -1); + } + + if (diff == 1) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * [addl dest, ct] + * + * Size 5 - 8. + */ + if (ct) + tmp = expand_simple_binop (mode, PLUS, + tmp, GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else if (cf == -1) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * orl $ct, dest + * + * Size 8. + */ + tmp = expand_simple_binop (mode, IOR, + tmp, GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else if (diff == -1 && ct) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * notl dest + * [addl dest, cf] + * + * Size 8 - 11. + */ + tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); + if (cf) + tmp = expand_simple_binop (mode, PLUS, + copy_rtx (tmp), GEN_INT (cf), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * [notl dest] + * andl cf - ct, dest + * [addl dest, ct] + * + * Size 8 - 11. + */ + + if (cf == 0) + { + cf = ct; + ct = 0; + tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); + } + + tmp = expand_simple_binop (mode, AND, + copy_rtx (tmp), + gen_int_mode (cf - ct, mode), + copy_rtx (tmp), 1, OPTAB_DIRECT); + if (ct) + tmp = expand_simple_binop (mode, PLUS, + copy_rtx (tmp), GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + + if (!rtx_equal_p (tmp, out)) + emit_move_insn (copy_rtx (out), copy_rtx (tmp)); + + return 1; /* DONE */ + } + + if (diff < 0) + { + HOST_WIDE_INT tmp; + tmp = ct, ct = cf, cf = tmp; + diff = -diff; + if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) + { + /* We may be reversing unordered compare to normal compare, that + is not valid in general (we may convert non-trapping condition + to trapping one), however on i386 we currently emit all + comparisons unordered. */ + compare_code = reverse_condition_maybe_unordered (compare_code); + code = reverse_condition_maybe_unordered (code); + } + else + { + compare_code = reverse_condition (compare_code); + code = reverse_condition (code); + } + } + + compare_code = UNKNOWN; + if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT + && GET_CODE (ix86_compare_op1) == CONST_INT) + { + if (ix86_compare_op1 == const0_rtx + && (code == LT || code == GE)) + compare_code = code; + else if (ix86_compare_op1 == constm1_rtx) + { + if (code == LE) + compare_code = LT; + else if (code == GT) + compare_code = GE; + } + } + + /* Optimize dest = (op0 < 0) ? -1 : cf. */ + if (compare_code != UNKNOWN + && GET_MODE (ix86_compare_op0) == GET_MODE (out) + && (cf == -1 || ct == -1)) + { + /* If lea code below could be used, only optimize + if it results in a 2 insn sequence. */ + + if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 + || diff == 3 || diff == 5 || diff == 9) + || (compare_code == LT && ct == -1) + || (compare_code == GE && cf == -1)) + { + /* + * notl op1 (if necessary) + * sarl $31, op1 + * orl cf, op1 + */ + if (ct != -1) + { + cf = ct; + ct = -1; + code = reverse_condition (code); + } + + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, -1); + + out = expand_simple_binop (mode, IOR, + out, GEN_INT (cf), + out, 1, OPTAB_DIRECT); + if (out != operands[0]) + emit_move_insn (operands[0], out); + + return 1; /* DONE */ + } + } + + + if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 + || diff == 3 || diff == 5 || diff == 9) + && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) + && (mode != DImode + || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) + { + /* + * xorl dest,dest + * cmpl op1,op2 + * setcc dest + * lea cf(dest*(ct-cf)),dest + * + * Size 14. + * + * This also catches the degenerate setcc-only case. + */ + + rtx tmp; + int nops; + + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, 1); + + nops = 0; + /* On x86_64 the lea instruction operates on Pmode, so we need + to get arithmetics done in proper mode to match. */ + if (diff == 1) + tmp = copy_rtx (out); + else + { + rtx out1; + out1 = copy_rtx (out); + tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); + nops++; + if (diff & 1) + { + tmp = gen_rtx_PLUS (mode, tmp, out1); + nops++; + } + } + if (cf != 0) + { + tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); + nops++; + } + if (!rtx_equal_p (tmp, out)) + { + if (nops == 1) + out = force_operand (tmp, copy_rtx (out)); + else + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); + } + if (!rtx_equal_p (out, operands[0])) + emit_move_insn (operands[0], copy_rtx (out)); + + return 1; /* DONE */ + } + + /* + * General case: Jumpful: + * xorl dest,dest cmpl op1, op2 + * cmpl op1, op2 movl ct, dest + * setcc dest jcc 1f + * decl dest movl cf, dest + * andl (cf-ct),dest 1: + * addl ct,dest + * + * Size 20. Size 14. + * + * This is reasonably steep, but branch mispredict costs are + * high on modern cpus, so consider failing only if optimizing + * for space. + */ + + if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) + && BRANCH_COST >= 2) + { + if (cf == 0) + { + cf = ct; + ct = 0; + if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) + /* We may be reversing unordered compare to normal compare, + that is not valid in general (we may convert non-trapping + condition to trapping one), however on i386 we currently + emit all comparisons unordered. */ + code = reverse_condition_maybe_unordered (code); + else + { + code = reverse_condition (code); + if (compare_code != UNKNOWN) + compare_code = reverse_condition (compare_code); + } + } + + if (compare_code != UNKNOWN) + { + /* notl op1 (if needed) + sarl $31, op1 + andl (cf-ct), op1 + addl ct, op1 + + For x < 0 (resp. x <= -1) there will be no notl, + so if possible swap the constants to get rid of the + complement. + True/false will be -1/0 while code below (store flag + followed by decrement) is 0/-1, so the constants need + to be exchanged once more. */ + + if (compare_code == GE || !cf) + { + code = reverse_condition (code); + compare_code = LT; + } + else + { + HOST_WIDE_INT tmp = cf; + cf = ct; + ct = tmp; + } + + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, -1); + } + else + { + out = emit_store_flag (out, code, ix86_compare_op0, + ix86_compare_op1, VOIDmode, 0, 1); + + out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, + copy_rtx (out), 1, OPTAB_DIRECT); + } + + out = expand_simple_binop (mode, AND, copy_rtx (out), + gen_int_mode (cf - ct, mode), + copy_rtx (out), 1, OPTAB_DIRECT); + if (ct) + out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), + copy_rtx (out), 1, OPTAB_DIRECT); + if (!rtx_equal_p (out, operands[0])) + emit_move_insn (operands[0], copy_rtx (out)); + + return 1; /* DONE */ + } + } + + if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) + { + /* Try a few things more with specific constants and a variable. */ + + optab op; + rtx var, orig_out, out, tmp; + + if (BRANCH_COST <= 2) + return 0; /* FAIL */ + + /* If one of the two operands is an interesting constant, load a + constant with the above and mask it in with a logical operation. */ + + if (GET_CODE (operands[2]) == CONST_INT) + { + var = operands[3]; + if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) + operands[3] = constm1_rtx, op = and_optab; + else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) + operands[3] = const0_rtx, op = ior_optab; + else + return 0; /* FAIL */ + } + else if (GET_CODE (operands[3]) == CONST_INT) + { + var = operands[2]; + if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) + operands[2] = constm1_rtx, op = and_optab; + else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) + operands[2] = const0_rtx, op = ior_optab; + else + return 0; /* FAIL */ + } + else + return 0; /* FAIL */ + + orig_out = operands[0]; + tmp = gen_reg_rtx (mode); + operands[0] = tmp; + + /* Recurse to get the constant loaded. */ + if (ix86_expand_int_movcc (operands) == 0) + return 0; /* FAIL */ + + /* Mask in the interesting variable. */ + out = expand_binop (mode, op, var, tmp, orig_out, 0, + OPTAB_WIDEN); + if (!rtx_equal_p (out, orig_out)) + emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); + + return 1; /* DONE */ + } + + /* + * For comparison with above, + * + * movl cf,dest + * movl ct,tmp + * cmpl op1,op2 + * cmovcc tmp,dest + * + * Size 15. + */ + + if (! nonimmediate_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + if (! nonimmediate_operand (operands[3], mode)) + operands[3] = force_reg (mode, operands[3]); + + if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) + { + rtx tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, operands[3]); + operands[3] = tmp; + } + if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) + { + rtx tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, operands[2]); + operands[2] = tmp; + } + + if (! register_operand (operands[2], VOIDmode) + && (mode == QImode + || ! register_operand (operands[3], VOIDmode))) + operands[2] = force_reg (mode, operands[2]); + + if (mode == QImode + && ! register_operand (operands[3], VOIDmode)) + operands[3] = force_reg (mode, operands[3]); + + emit_insn (compare_seq); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_IF_THEN_ELSE (mode, + compare_op, operands[2], + operands[3]))); + if (bypass_test) + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), + gen_rtx_IF_THEN_ELSE (mode, + bypass_test, + copy_rtx (operands[3]), + copy_rtx (operands[0])))); + if (second_test) + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), + gen_rtx_IF_THEN_ELSE (mode, + second_test, + copy_rtx (operands[2]), + copy_rtx (operands[0])))); + + return 1; /* DONE */ +} + +/* Swap, force into registers, or otherwise massage the two operands + to an sse comparison with a mask result. Thus we differ a bit from + ix86_prepare_fp_compare_args which expects to produce a flags result. + + The DEST operand exists to help determine whether to commute commutative + operators. The POP0/POP1 operands are updated in place. The new + comparison code is returned, or UNKNOWN if not implementable. */ + +static enum rtx_code +ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, + rtx *pop0, rtx *pop1) +{ + rtx tmp; + + switch (code) + { + case LTGT: + case UNEQ: + /* We have no LTGT as an operator. We could implement it with + NE & ORDERED, but this requires an extra temporary. It's + not clear that it's worth it. */ + return UNKNOWN; + + case LT: + case LE: + case UNGT: + case UNGE: + /* These are supported directly. */ + break; + + case EQ: + case NE: + case UNORDERED: + case ORDERED: + /* For commutative operators, try to canonicalize the destination + operand to be first in the comparison - this helps reload to + avoid extra moves. */ + if (!dest || !rtx_equal_p (dest, *pop1)) + break; + /* FALLTHRU */ + + case GE: + case GT: + case UNLE: + case UNLT: + /* These are not supported directly. Swap the comparison operands + to transform into something that is supported. */ + tmp = *pop0; + *pop0 = *pop1; + *pop1 = tmp; + code = swap_condition (code); + break; + + default: + gcc_unreachable (); + } + + return code; +} + +/* Detect conditional moves that exactly match min/max operational + semantics. Note that this is IEEE safe, as long as we don't + interchange the operands. + + Returns FALSE if this conditional move doesn't match a MIN/MAX, + and TRUE if the operation is successful and instructions are emitted. */ + +static bool +ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, + rtx cmp_op1, rtx if_true, rtx if_false) +{ + enum machine_mode mode; + bool is_min; + rtx tmp; + + if (code == LT) + ; + else if (code == UNGE) + { + tmp = if_true; + if_true = if_false; + if_false = tmp; + } + else + return false; + + if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) + is_min = true; + else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) + is_min = false; + else + return false; + + mode = GET_MODE (dest); + + /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, + but MODE may be a vector mode and thus not appropriate. */ + if (!flag_finite_math_only || !flag_unsafe_math_optimizations) + { + int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; + rtvec v; + + if_true = force_reg (mode, if_true); + v = gen_rtvec (2, if_true, if_false); + tmp = gen_rtx_UNSPEC (mode, v, u); + } + else + { + code = is_min ? SMIN : SMAX; + tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); + } + + emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); + return true; +} + +/* Expand an sse vector comparison. Return the register with the result. */ + +static rtx +ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, + rtx op_true, rtx op_false) +{ + enum machine_mode mode = GET_MODE (dest); + rtx x; + + cmp_op0 = force_reg (mode, cmp_op0); + if (!nonimmediate_operand (cmp_op1, mode)) + cmp_op1 = force_reg (mode, cmp_op1); + + if (optimize + || reg_overlap_mentioned_p (dest, op_true) + || reg_overlap_mentioned_p (dest, op_false)) + dest = gen_reg_rtx (mode); + + x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + + return dest; +} + +/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical + operations. This is used for both scalar and vector conditional moves. */ + +static void +ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) +{ + enum machine_mode mode = GET_MODE (dest); + rtx t2, t3, x; + + if (op_false == CONST0_RTX (mode)) + { + op_true = force_reg (mode, op_true); + x = gen_rtx_AND (mode, cmp, op_true); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + else if (op_true == CONST0_RTX (mode)) + { + op_false = force_reg (mode, op_false); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + else + { + op_true = force_reg (mode, op_true); + op_false = force_reg (mode, op_false); + + t2 = gen_reg_rtx (mode); + if (optimize) + t3 = gen_reg_rtx (mode); + else + t3 = dest; + + x = gen_rtx_AND (mode, op_true, cmp); + emit_insn (gen_rtx_SET (VOIDmode, t2, x)); + + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, t3, x)); + + x = gen_rtx_IOR (mode, t3, t2); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } +} + +/* Expand a floating-point conditional move. Return true if successful. */ + +int +ix86_expand_fp_movcc (rtx operands[]) +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp, compare_op, second_test, bypass_test; + + if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) + { + enum machine_mode cmode; + + /* Since we've no cmove for sse registers, don't force bad register + allocation just to gain access to it. Deny movcc when the + comparison mode doesn't match the move mode. */ + cmode = GET_MODE (ix86_compare_op0); + if (cmode == VOIDmode) + cmode = GET_MODE (ix86_compare_op1); + if (cmode != mode) + return 0; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &ix86_compare_op0, + &ix86_compare_op1); + if (code == UNKNOWN) + return 0; + + if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, + ix86_compare_op1, operands[2], + operands[3])) + return 1; + + tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, + ix86_compare_op1, operands[2], operands[3]); + ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); + return 1; + } + + /* The floating point conditional move instructions don't directly + support conditions resulting from a signed integer comparison. */ + + compare_op = ix86_expand_compare (code, &second_test, &bypass_test); + + /* The floating point conditional move instructions don't directly + support signed integer comparisons. */ + + if (!fcmov_comparison_operator (compare_op, VOIDmode)) + { + gcc_assert (!second_test && !bypass_test); + tmp = gen_reg_rtx (QImode); + ix86_expand_setcc (code, tmp); + code = NE; + ix86_compare_op0 = tmp; + ix86_compare_op1 = const0_rtx; + compare_op = ix86_expand_compare (code, &second_test, &bypass_test); + } + if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, operands[3]); + operands[3] = tmp; + } + if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) + { + tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, operands[2]); + operands[2] = tmp; + } + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_IF_THEN_ELSE (mode, compare_op, + operands[2], operands[3]))); + if (bypass_test) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_IF_THEN_ELSE (mode, bypass_test, + operands[3], operands[0]))); + if (second_test) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_IF_THEN_ELSE (mode, second_test, + operands[2], operands[0]))); + + return 1; +} + +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + return false; + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[]) +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; + + /* Canonicalize the comparison to EQ, GT, GTU. */ + switch (code) + { + case EQ: + case GT: + case GTU: + break; + + case NE: + case LE: + case LEU: + code = reverse_condition (code); + negate = true; + break; + + case GE: + case GEU: + code = reverse_condition (code); + negate = true; + /* FALLTHRU */ + + case LT: + case LTU: + code = swap_condition (code); + x = cop0, cop0 = cop1, cop1 = x; + break; + + default: + gcc_unreachable (); + } + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* Only SSE4.1/SSE4.2 supports V2DImode. */ + if (mode == V2DImode) + { + switch (code) + { + case EQ: + /* SSE4.1 supports EQ. */ + if (!TARGET_SSE4_1) + return false; + break; + + case GT: + case GTU: + /* SSE4.2 supports GT/GTU. */ + if (!TARGET_SSE4_2) + return false; + break; + + default: + gcc_unreachable (); + } + } + /* APPLE LOCAL end 5612787 mainline sse4 */ + + /* Unsigned parallel compare is not supported by the hardware. Play some + tricks to turn this into a signed comparison against 0. */ + if (code == GTU) + { + cop0 = force_reg (mode, cop0); + + switch (mode) + { + case V4SImode: + { + rtx t1, t2, mask; + + /* Perform a parallel modulo subtraction. */ + t1 = gen_reg_rtx (mode); + emit_insn (gen_subv4si3 (t1, cop0, cop1)); + + /* Extract the original sign bit of op0. */ + mask = GEN_INT (-0x80000000); + mask = gen_rtx_CONST_VECTOR (mode, + gen_rtvec (4, mask, mask, mask, mask)); + mask = force_reg (mode, mask); + t2 = gen_reg_rtx (mode); + emit_insn (gen_andv4si3 (t2, cop0, mask)); + + /* XOR it back into the result of the subtraction. This results + in the sign bit set iff we saw unsigned underflow. */ + x = gen_reg_rtx (mode); + emit_insn (gen_xorv4si3 (x, t1, t2)); + + code = GT; + } + break; + + case V16QImode: + case V8HImode: + /* Perform a parallel unsigned saturating subtraction. */ + x = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, x, + gen_rtx_US_MINUS (mode, cop0, cop1))); + + code = EQ; + negate = !negate; + break; + + default: + gcc_unreachable (); + } + + cop0 = x; + cop1 = CONST0_RTX (mode); + } + + x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, + operands[1+negate], operands[2-negate]); + + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], + operands[2-negate]); + return true; +} + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is + true if we should do zero extension, else sign extension. HIGH_P is + true if we want the N/2 high elements, else the low elements. */ + +void +ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) +{ + enum machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack)(rtx, rtx, rtx); + rtx se, dest; + + switch (imode) + { + case V16QImode: + if (high_p) + unpack = gen_vec_interleave_highv16qi; + else + unpack = gen_vec_interleave_lowv16qi; + break; + case V8HImode: + if (high_p) + unpack = gen_vec_interleave_highv8hi; + else + unpack = gen_vec_interleave_lowv8hi; + break; + case V4SImode: + if (high_p) + unpack = gen_vec_interleave_highv4si; + else + unpack = gen_vec_interleave_lowv4si; + break; + default: + gcc_unreachable (); + } + + dest = gen_lowpart (imode, operands[0]); + + if (unsigned_p) + se = force_reg (imode, CONST0_RTX (imode)); + else + se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), + operands[1], pc_rtx, pc_rtx); + + emit_insn (unpack (dest, operands[1], se)); +} + +/* This function performs the same task as ix86_expand_sse_unpack, + but with SSE4.1 instructions. */ + +void +ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) +{ + enum machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack)(rtx, rtx); + rtx src, dest; + + switch (imode) + { + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + + dest = operands[0]; + if (high_p) + { + /* Shift higher 8 bytes to lower 8 bytes. */ + src = gen_reg_rtx (imode); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), + gen_lowpart (TImode, operands[1]), + GEN_INT (64))); + } + else + src = operands[1]; + + emit_insn (unpack (dest, src)); +} +/* APPLE LOCAL end 5612787 mainline sse4 */ + +/* Expand conditional increment or decrement using adb/sbb instructions. + The default case using setcc followed by the conditional move can be + done by generic code. */ +int +ix86_expand_int_addcc (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx compare_op; + rtx val = const0_rtx; + bool fpcmp = false; + enum machine_mode mode = GET_MODE (operands[0]); + + if (operands[3] != const1_rtx + && operands[3] != constm1_rtx) + return 0; + if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, + ix86_compare_op1, &compare_op)) + return 0; + code = GET_CODE (compare_op); + + if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode + || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) + { + fpcmp = true; + code = ix86_fp_compare_code_to_integer (code); + } + + if (code != LTU) + { + val = constm1_rtx; + if (fpcmp) + PUT_CODE (compare_op, + reverse_condition_maybe_unordered + (GET_CODE (compare_op))); + else + PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); + } + PUT_MODE (compare_op, mode); + + /* Construct either adc or sbb insn. */ + if ((code == LTU) == (operands[3] == constm1_rtx)) + { + switch (GET_MODE (operands[0])) + { + case QImode: + emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); + break; + case HImode: + emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); + break; + case SImode: + emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); + break; + case DImode: + emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); + break; + default: + gcc_unreachable (); + } + } + else + { + switch (GET_MODE (operands[0])) + { + case QImode: + emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); + break; + case HImode: + emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); + break; + case SImode: + emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); + break; + case DImode: + emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); + break; + default: + gcc_unreachable (); + } + } + return 1; /* DONE */ +} + + +/* Split operands 0 and 1 into SImode parts. Similar to split_di, but + works for floating pointer parameters and nonoffsetable memories. + For pushes, it returns just stack offsets; the values will be saved + in the right order. Maximally three parts are generated. */ + +static int +ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) +{ + int size; + + if (!TARGET_64BIT) + size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; + else + size = (GET_MODE_SIZE (mode) + 4) / 8; + + gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); + gcc_assert (size >= 2 && size <= 3); + + /* Optimize constant pool reference to immediates. This is used by fp + moves, that force all constants to memory to allow combining. */ + if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) + { + rtx tmp = maybe_get_pool_constant (operand); + if (tmp) + operand = tmp; + } + + if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) + { + /* The only non-offsetable memories we handle are pushes. */ + int ok = push_operand (operand, VOIDmode); + + gcc_assert (ok); + + operand = copy_rtx (operand); + PUT_MODE (operand, Pmode); + parts[0] = parts[1] = parts[2] = operand; + return size; + } + + if (GET_CODE (operand) == CONST_VECTOR) + { + enum machine_mode imode = int_mode_for_mode (mode); + /* Caution: if we looked through a constant pool memory above, + the operand may actually have a different mode now. That's + ok, since we want to pun this all the way back to an integer. */ + operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); + gcc_assert (operand != NULL); + mode = imode; + } + + if (!TARGET_64BIT) + { + if (mode == DImode) + split_di (&operand, 1, &parts[0], &parts[1]); + else + { + if (REG_P (operand)) + { + gcc_assert (reload_completed); + parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); + parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); + if (size == 3) + parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); + } + else if (offsettable_memref_p (operand)) + { + operand = adjust_address (operand, SImode, 0); + parts[0] = operand; + parts[1] = adjust_address (operand, SImode, 4); + if (size == 3) + parts[2] = adjust_address (operand, SImode, 8); + } + else if (GET_CODE (operand) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + long l[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operand); + switch (mode) + { + case XFmode: + REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); + parts[2] = gen_int_mode (l[2], SImode); + break; + case DFmode: + REAL_VALUE_TO_TARGET_DOUBLE (r, l); + break; + default: + gcc_unreachable (); + } + parts[1] = gen_int_mode (l[1], SImode); + parts[0] = gen_int_mode (l[0], SImode); + } + else + gcc_unreachable (); + } + } + else + { + if (mode == TImode) + split_ti (&operand, 1, &parts[0], &parts[1]); + if (mode == XFmode || mode == TFmode) + { + enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; + if (REG_P (operand)) + { + gcc_assert (reload_completed); + parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); + parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); + } + else if (offsettable_memref_p (operand)) + { + operand = adjust_address (operand, DImode, 0); + parts[0] = operand; + parts[1] = adjust_address (operand, upper_mode, 8); + } + else if (GET_CODE (operand) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + long l[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operand); + real_to_target (l, &r, mode); + + /* Do not use shift by 32 to avoid warning on 32bit systems. */ + if (HOST_BITS_PER_WIDE_INT >= 64) + parts[0] + = gen_int_mode + ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) + + ((((HOST_WIDE_INT) l[1]) << 31) << 1), + DImode); + else + parts[0] = immed_double_const (l[0], l[1], DImode); + + if (upper_mode == SImode) + parts[1] = gen_int_mode (l[2], SImode); + else if (HOST_BITS_PER_WIDE_INT >= 64) + parts[1] + = gen_int_mode + ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) + + ((((HOST_WIDE_INT) l[3]) << 31) << 1), + DImode); + else + parts[1] = immed_double_const (l[2], l[3], DImode); + } + else + gcc_unreachable (); + } + } + + return size; +} + +/* Emit insns to perform a move or push of DI, DF, and XF values. + Return false when normal moves are needed; true when all required + insns have been emitted. Operands 2-4 contain the input values + int the correct order; operands 5-7 contain the output values. */ + +void +ix86_split_long_move (rtx operands[]) +{ + rtx part[2][3]; + int nparts; + int push = 0; + int collisions = 0; + enum machine_mode mode = GET_MODE (operands[0]); + + /* The DFmode expanders may ask us to move double. + For 64bit target this is single move. By hiding the fact + here we simplify i386.md splitters. */ + if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) + { + /* Optimize constant pool reference to immediates. This is used by + fp moves, that force all constants to memory to allow combining. */ + + if (GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) + operands[1] = get_pool_constant (XEXP (operands[1], 0)); + if (push_operand (operands[0], VOIDmode)) + { + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], Pmode); + } + else + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); + emit_move_insn (operands[0], operands[1]); + return; + } + + /* The only non-offsettable memory we handle is push. */ + if (push_operand (operands[0], VOIDmode)) + push = 1; + else + gcc_assert (GET_CODE (operands[0]) != MEM + || offsettable_memref_p (operands[0])); + + nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); + ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); + + /* When emitting push, take care for source operands on the stack. */ + if (push && GET_CODE (operands[1]) == MEM + && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) + { + /* APPLE LOCAL begin 4099768 */ + if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode) + part[1][2] = adjust_address (part[1][2], SImode, 4); + /* APPLE LOCAL end 4099768 */ + if (nparts == 3) + part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), + XEXP (part[1][2], 0)); + part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), + XEXP (part[1][1], 0)); + } + + /* We need to do copy in the right order in case an address register + of the source overlaps the destination. */ + if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) + { + if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) + collisions++; + if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) + collisions++; + if (nparts == 3 + && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) + collisions++; + + /* Collision in the middle part can be handled by reordering. */ + if (collisions == 1 && nparts == 3 + && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) + { + rtx tmp; + tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; + tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; + } + + /* If there are more collisions, we can't handle it by reordering. + Do an lea to the last part and use only one colliding move. */ + else if (collisions > 1) + { + rtx base; + + collisions = 1; + + base = part[0][nparts - 1]; + + /* Handle the case when the last part isn't valid for lea. + Happens in 64-bit mode storing the 12-byte XFmode. */ + if (GET_MODE (base) != Pmode) + base = gen_rtx_REG (Pmode, REGNO (base)); + + emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + part[1][0] = replace_equiv_address (part[1][0], base); + part[1][1] = replace_equiv_address (part[1][1], + plus_constant (base, UNITS_PER_WORD)); + if (nparts == 3) + part[1][2] = replace_equiv_address (part[1][2], + plus_constant (base, 8)); + } + } + + if (push) + { + if (!TARGET_64BIT) + { + if (nparts == 3) + { + if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); + emit_move_insn (part[0][2], part[1][2]); + } + } + else + { + /* In 64bit mode we don't have 32bit push available. In case this is + register, it is OK - we will just use larger counterpart. We also + retype memory - these comes from attempt to avoid REX prefix on + moving of second half of TFmode value. */ + if (GET_MODE (part[1][1]) == SImode) + { + switch (GET_CODE (part[1][1])) + { + case MEM: + part[1][1] = adjust_address (part[1][1], DImode, 0); + break; + + case REG: + part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); + break; + + default: + gcc_unreachable (); + } + + if (GET_MODE (part[1][0]) == SImode) + part[1][0] = part[1][1]; + } + } + emit_move_insn (part[0][1], part[1][1]); + emit_move_insn (part[0][0], part[1][0]); + return; + } + + /* Choose correct order to not overwrite the source before it is copied. */ + if ((REG_P (part[0][0]) + && REG_P (part[1][1]) + && (REGNO (part[0][0]) == REGNO (part[1][1]) + || (nparts == 3 + && REGNO (part[0][0]) == REGNO (part[1][2])))) + || (collisions > 0 + && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) + { + if (nparts == 3) + { + operands[2] = part[0][2]; + operands[3] = part[0][1]; + operands[4] = part[0][0]; + operands[5] = part[1][2]; + operands[6] = part[1][1]; + operands[7] = part[1][0]; + } + else + { + operands[2] = part[0][1]; + operands[3] = part[0][0]; + operands[5] = part[1][1]; + operands[6] = part[1][0]; + } + } + else + { + if (nparts == 3) + { + operands[2] = part[0][0]; + operands[3] = part[0][1]; + operands[4] = part[0][2]; + operands[5] = part[1][0]; + operands[6] = part[1][1]; + operands[7] = part[1][2]; + } + else + { + operands[2] = part[0][0]; + operands[3] = part[0][1]; + operands[5] = part[1][0]; + operands[6] = part[1][1]; + } + } + + /* If optimizing for size, attempt to locally unCSE nonzero constants. */ + if (optimize_size) + { + if (GET_CODE (operands[5]) == CONST_INT + && operands[5] != const0_rtx + && REG_P (operands[2])) + { + if (GET_CODE (operands[6]) == CONST_INT + && INTVAL (operands[6]) == INTVAL (operands[5])) + operands[6] = operands[2]; + + if (nparts == 3 + && GET_CODE (operands[7]) == CONST_INT + && INTVAL (operands[7]) == INTVAL (operands[5])) + operands[7] = operands[2]; + } + + if (nparts == 3 + && GET_CODE (operands[6]) == CONST_INT + && operands[6] != const0_rtx + && REG_P (operands[3]) + && GET_CODE (operands[7]) == CONST_INT + && INTVAL (operands[7]) == INTVAL (operands[6])) + operands[7] = operands[3]; + } + + emit_move_insn (operands[2], operands[5]); + emit_move_insn (operands[3], operands[6]); + if (nparts == 3) + emit_move_insn (operands[4], operands[7]); + + return; +} + +/* Helper function of ix86_split_ashl used to generate an SImode/DImode + left shift by a constant, either using a single shift or + a sequence of add instructions. */ + +static void +ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) +{ + if (count == 1) + { + emit_insn ((mode == DImode + ? gen_addsi3 + : gen_adddi3) (operand, operand, operand)); + } + else if (!optimize_size + && count * ix86_cost->add <= ix86_cost->shift_const) + { + int i; + for (i=0; i<count; i++) + { + emit_insn ((mode == DImode + ? gen_addsi3 + : gen_adddi3) (operand, operand, operand)); + } + } + else + emit_insn ((mode == DImode + ? gen_ashlsi3 + : gen_ashldi3) (operand, operand, GEN_INT (count))); +} + +void +ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) +{ + rtx low[2], high[2]; + int count; + const int single_width = mode == DImode ? 32 : 64; + + if (GET_CODE (operands[2]) == CONST_INT) + { + (mode == DImode ? split_di : split_ti) (operands, 2, low, high); + count = INTVAL (operands[2]) & (single_width * 2 - 1); + + if (count >= single_width) + { + emit_move_insn (high[0], low[1]); + emit_move_insn (low[0], const0_rtx); + + if (count > single_width) + ix86_expand_ashl_const (high[0], count - single_width, mode); + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + emit_insn ((mode == DImode + ? gen_x86_shld_1 + : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); + ix86_expand_ashl_const (low[0], count, mode); + } + return; + } + + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + + if (operands[1] == const1_rtx) + { + /* Assuming we've chosen a QImode capable registers, then 1 << N + can be done with two 32/64-bit shifts, no branches, no cmoves. */ + if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) + { + rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); + + ix86_expand_clear (low[0]); + ix86_expand_clear (high[0]); + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); + + d = gen_lowpart (QImode, low[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_EQ (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, d, s)); + + d = gen_lowpart (QImode, high[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_NE (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, d, s)); + } + + /* Otherwise, we can get the same results by manually performing + a bit extract operation on bit 5/6, and then performing the two + shifts. The two methods of getting 0/1 into low/high are exactly + the same size. Avoiding the shift in the bit extract case helps + pentium4 a bit; no one else seems to care much either way. */ + else + { + rtx x; + + if (TARGET_PARTIAL_REG_STALL && !optimize_size) + x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); + else + x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); + emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); + + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); + emit_insn ((mode == DImode + ? gen_andsi3 + : gen_anddi3) (high[0], high[0], GEN_INT (1))); + emit_move_insn (low[0], high[0]); + emit_insn ((mode == DImode + ? gen_xorsi3 + : gen_xordi3) (low[0], low[0], GEN_INT (1))); + } + + emit_insn ((mode == DImode + ? gen_ashlsi3 + : gen_ashldi3) (low[0], low[0], operands[2])); + emit_insn ((mode == DImode + ? gen_ashlsi3 + : gen_ashldi3) (high[0], high[0], operands[2])); + return; + } + + if (operands[1] == constm1_rtx) + { + /* For -1 << N, we can avoid the shld instruction, because we + know that we're shifting 0...31/63 ones into a -1. */ + emit_move_insn (low[0], constm1_rtx); + if (optimize_size) + emit_move_insn (high[0], low[0]); + else + emit_move_insn (high[0], constm1_rtx); + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + emit_insn ((mode == DImode + ? gen_x86_shld_1 + : gen_x86_64_shld) (high[0], low[0], operands[2])); + } + + emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + ix86_expand_clear (scratch); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); + } + else + emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); +} + +void +ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) +{ + rtx low[2], high[2]; + int count; + const int single_width = mode == DImode ? 32 : 64; + + if (GET_CODE (operands[2]) == CONST_INT) + { + (mode == DImode ? split_di : split_ti) (operands, 2, low, high); + count = INTVAL (operands[2]) & (single_width * 2 - 1); + + if (count == single_width * 2 - 1) + { + emit_move_insn (high[0], high[1]); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], + GEN_INT (single_width - 1))); + emit_move_insn (low[0], high[0]); + + } + else if (count >= single_width) + { + emit_move_insn (low[0], high[1]); + emit_move_insn (high[0], low[0]); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], + GEN_INT (single_width - 1))); + if (count > single_width) + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (low[0], low[0], + GEN_INT (count - single_width))); + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); + } + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (high[0], high[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + emit_move_insn (scratch, high[0]); + emit_insn ((mode == DImode + ? gen_ashrsi3 + : gen_ashrdi3) (scratch, scratch, + GEN_INT (single_width - 1))); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (low[0], high[0], operands[2], + scratch)); + } + else + emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); + } +} + +void +ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) +{ + rtx low[2], high[2]; + int count; + const int single_width = mode == DImode ? 32 : 64; + + if (GET_CODE (operands[2]) == CONST_INT) + { + (mode == DImode ? split_di : split_ti) (operands, 2, low, high); + count = INTVAL (operands[2]) & (single_width * 2 - 1); + + if (count >= single_width) + { + emit_move_insn (low[0], high[1]); + ix86_expand_clear (high[0]); + + if (count > single_width) + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (low[0], low[0], + GEN_INT (count - single_width))); + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); + } + } + else + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + + emit_insn ((mode == DImode + ? gen_x86_shrd_1 + : gen_x86_64_shrd) (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_lshrsi3 + : gen_lshrdi3) (high[0], high[0], operands[2])); + + /* Heh. By reversing the arguments, we can reuse this pattern. */ + if (TARGET_CMOVE && scratch) + { + ix86_expand_clear (scratch); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_1 + : gen_x86_64_shift_adj) (low[0], high[0], operands[2], + scratch)); + } + else + emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); + } +} + +/* Helper function for the string operations below. Dest VARIABLE whether + it is aligned to VALUE bytes. If true, jump to the label. */ +static rtx +ix86_expand_aligntest (rtx variable, int value) +{ + rtx label = gen_label_rtx (); + rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); + if (GET_MODE (variable) == DImode) + emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); + else + emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); + emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), + 1, label); + return label; +} + +/* Adjust COUNTER by the VALUE. */ +static void +ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) +{ + if (GET_MODE (countreg) == DImode) + emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); + else + emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); +} + +/* Zero extend possibly SImode EXP to Pmode register. */ +rtx +ix86_zero_extend_to_Pmode (rtx exp) +{ + rtx r; + if (GET_MODE (exp) == VOIDmode) + return force_reg (Pmode, exp); + if (GET_MODE (exp) == Pmode) + return copy_to_mode_reg (Pmode, exp); + r = gen_reg_rtx (Pmode); + emit_insn (gen_zero_extendsidi2 (r, exp)); + return r; +} + +/* Expand string move (memcpy) operation. Use i386 string operations when + profitable. expand_clrmem contains similar code. */ +int +ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) +{ + rtx srcreg, destreg, countreg, srcexp, destexp; + enum machine_mode counter_mode; + HOST_WIDE_INT align = 0; + unsigned HOST_WIDE_INT count = 0; + + if (GET_CODE (align_exp) == CONST_INT) + align = INTVAL (align_exp); + + /* Can't use any of this if the user has appropriated esi or edi. */ + if (global_regs[4] || global_regs[5]) + return 0; + + /* This simple hack avoids all inlining code and simplifies code below. */ + if (!TARGET_ALIGN_STRINGOPS) + align = 64; + + if (GET_CODE (count_exp) == CONST_INT) + { + count = INTVAL (count_exp); + if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) + return 0; + } + + /* Figure out proper mode for counter. For 32bits it is always SImode, + for 64bits use SImode when possible, otherwise DImode. + Set count to number of bytes copied when known at compile time. */ + if (!TARGET_64BIT + || GET_MODE (count_exp) == SImode + || x86_64_zext_immediate_operand (count_exp, VOIDmode)) + counter_mode = SImode; + else + counter_mode = DImode; + + gcc_assert (counter_mode == SImode || counter_mode == DImode); + + destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + if (destreg != XEXP (dst, 0)) + dst = replace_equiv_address_nv (dst, destreg); + srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); + if (srcreg != XEXP (src, 0)) + src = replace_equiv_address_nv (src, srcreg); + + /* When optimizing for size emit simple rep ; movsb instruction for + counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? + sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. + Sice of (movsl;)*(movsw;)?(movsb;)? sequence is + count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, + but we don't know whether upper 24 (resp. 56) bits of %ecx will be + known to be zero or not. The rep; movsb sequence causes higher + register pressure though, so take that into account. */ + + if ((!optimize || optimize_size) + && (count == 0 + || ((count & 0x03) + && (!optimize_size + || count > 5 * 4 + || (count & 3) + count / 4 > 6)))) + { + emit_insn (gen_cld ()); + countreg = ix86_zero_extend_to_Pmode (count_exp); + destexp = gen_rtx_PLUS (Pmode, destreg, countreg); + srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); + emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, + destexp, srcexp)); + } + + /* For constant aligned (or small unaligned) copies use rep movsl + followed by code copying the rest. For PentiumPro ensure 8 byte + alignment to allow rep movsl acceleration. */ + + else if (count != 0 + && (align >= 8 + || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) + || optimize_size || count < (unsigned int) 64)) + { + unsigned HOST_WIDE_INT offset = 0; + int size = TARGET_64BIT && !optimize_size ? 8 : 4; + rtx srcmem, dstmem; + + emit_insn (gen_cld ()); + if (count & ~(size - 1)) + { + if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) + { + enum machine_mode movs_mode = size == 4 ? SImode : DImode; + + while (offset < (count & ~(size - 1))) + { + srcmem = adjust_automodify_address_nv (src, movs_mode, + srcreg, offset); + dstmem = adjust_automodify_address_nv (dst, movs_mode, + destreg, offset); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + offset += size; + } + } + else + { + countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) + & (TARGET_64BIT ? -1 : 0x3fffffff)); + countreg = copy_to_mode_reg (counter_mode, countreg); + countreg = ix86_zero_extend_to_Pmode (countreg); + + destexp = gen_rtx_ASHIFT (Pmode, countreg, + GEN_INT (size == 4 ? 2 : 3)); + srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); + destexp = gen_rtx_PLUS (Pmode, destexp, destreg); + + emit_insn (gen_rep_mov (destreg, dst, srcreg, src, + countreg, destexp, srcexp)); + offset = count & ~(size - 1); + } + } + if (size == 8 && (count & 0x04)) + { + srcmem = adjust_automodify_address_nv (src, SImode, srcreg, + offset); + dstmem = adjust_automodify_address_nv (dst, SImode, destreg, + offset); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + offset += 4; + } + if (count & 0x02) + { + srcmem = adjust_automodify_address_nv (src, HImode, srcreg, + offset); + dstmem = adjust_automodify_address_nv (dst, HImode, destreg, + offset); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + offset += 2; + } + if (count & 0x01) + { + srcmem = adjust_automodify_address_nv (src, QImode, srcreg, + offset); + dstmem = adjust_automodify_address_nv (dst, QImode, destreg, + offset); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + } + } + /* The generic code based on the glibc implementation: + - align destination to 4 bytes (8 byte alignment is used for PentiumPro + allowing accelerated copying there) + - copy the data using rep movsl + - copy the rest. */ + else + { + rtx countreg2; + rtx label = NULL; + rtx srcmem, dstmem; + int desired_alignment = (TARGET_PENTIUMPRO + && (count == 0 || count >= (unsigned int) 260) + ? 8 : UNITS_PER_WORD); + /* Get rid of MEM_OFFSETs, they won't be accurate. */ + dst = change_address (dst, BLKmode, destreg); + src = change_address (src, BLKmode, srcreg); + + /* In case we don't know anything about the alignment, default to + library version, since it is usually equally fast and result in + shorter code. + + Also emit call when we know that the count is large and call overhead + will not be important. */ + if (!TARGET_INLINE_ALL_STRINGOPS + && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) + return 0; + + if (TARGET_SINGLE_STRINGOP) + emit_insn (gen_cld ()); + + countreg2 = gen_reg_rtx (Pmode); + countreg = copy_to_mode_reg (counter_mode, count_exp); + + /* We don't use loops to align destination and to copy parts smaller + than 4 bytes, because gcc is able to optimize such code better (in + the case the destination or the count really is aligned, gcc is often + able to predict the branches) and also it is friendlier to the + hardware branch prediction. + + Using loops is beneficial for generic case, because we can + handle small counts using the loops. Many CPUs (such as Athlon) + have large REP prefix setup costs. + + This is quite costly. Maybe we can revisit this decision later or + add some customizability to this code. */ + + if (count == 0 && align < desired_alignment) + { + label = gen_label_rtx (); + emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), + LEU, 0, counter_mode, 1, label); + } + if (align <= 1) + { + rtx label = ix86_expand_aligntest (destreg, 1); + srcmem = change_address (src, QImode, srcreg); + dstmem = change_address (dst, QImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + ix86_adjust_counter (countreg, 1); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align <= 2) + { + rtx label = ix86_expand_aligntest (destreg, 2); + srcmem = change_address (src, HImode, srcreg); + dstmem = change_address (dst, HImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + ix86_adjust_counter (countreg, 2); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align <= 4 && desired_alignment > 4) + { + rtx label = ix86_expand_aligntest (destreg, 4); + srcmem = change_address (src, SImode, srcreg); + dstmem = change_address (dst, SImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + ix86_adjust_counter (countreg, 4); + emit_label (label); + LABEL_NUSES (label) = 1; + } + + if (label && desired_alignment > 4 && !TARGET_64BIT) + { + emit_label (label); + LABEL_NUSES (label) = 1; + label = NULL_RTX; + } + if (!TARGET_SINGLE_STRINGOP) + emit_insn (gen_cld ()); + if (TARGET_64BIT) + { + emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), + GEN_INT (3))); + destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); + } + else + { + emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); + destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); + } + srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); + destexp = gen_rtx_PLUS (Pmode, destexp, destreg); + emit_insn (gen_rep_mov (destreg, dst, srcreg, src, + countreg2, destexp, srcexp)); + + if (label) + { + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) + { + srcmem = change_address (src, SImode, srcreg); + dstmem = change_address (dst, SImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + } + if ((align <= 4 || count == 0) && TARGET_64BIT) + { + rtx label = ix86_expand_aligntest (countreg, 4); + srcmem = change_address (src, SImode, srcreg); + dstmem = change_address (dst, SImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align > 2 && count != 0 && (count & 2)) + { + srcmem = change_address (src, HImode, srcreg); + dstmem = change_address (dst, HImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + } + if (align <= 2 || count == 0) + { + rtx label = ix86_expand_aligntest (countreg, 2); + srcmem = change_address (src, HImode, srcreg); + dstmem = change_address (dst, HImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align > 1 && count != 0 && (count & 1)) + { + srcmem = change_address (src, QImode, srcreg); + dstmem = change_address (dst, QImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + } + if (align <= 1 || count == 0) + { + rtx label = ix86_expand_aligntest (countreg, 1); + srcmem = change_address (src, QImode, srcreg); + dstmem = change_address (dst, QImode, destreg); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + } + + return 1; +} + +/* Expand string clear operation (bzero). Use i386 string operations when + profitable. expand_movmem contains similar code. */ +int +ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) +{ + rtx destreg, zeroreg, countreg, destexp; + enum machine_mode counter_mode; + HOST_WIDE_INT align = 0; + unsigned HOST_WIDE_INT count = 0; + + if (GET_CODE (align_exp) == CONST_INT) + align = INTVAL (align_exp); + + /* Can't use any of this if the user has appropriated esi. */ + if (global_regs[4]) + return 0; + + /* This simple hack avoids all inlining code and simplifies code below. */ + if (!TARGET_ALIGN_STRINGOPS) + align = 32; + + if (GET_CODE (count_exp) == CONST_INT) + { + count = INTVAL (count_exp); + if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) + return 0; + } + /* Figure out proper mode for counter. For 32bits it is always SImode, + for 64bits use SImode when possible, otherwise DImode. + Set count to number of bytes copied when known at compile time. */ + if (!TARGET_64BIT + || GET_MODE (count_exp) == SImode + || x86_64_zext_immediate_operand (count_exp, VOIDmode)) + counter_mode = SImode; + else + counter_mode = DImode; + + destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + if (destreg != XEXP (dst, 0)) + dst = replace_equiv_address_nv (dst, destreg); + + + /* When optimizing for size emit simple rep ; movsb instruction for + counts not divisible by 4. The movl $N, %ecx; rep; stosb + sequence is 7 bytes long, so if optimizing for size and count is + small enough that some stosl, stosw and stosb instructions without + rep are shorter, fall back into the next if. */ + + if ((!optimize || optimize_size) + && (count == 0 + || ((count & 0x03) + && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) + { + emit_insn (gen_cld ()); + + countreg = ix86_zero_extend_to_Pmode (count_exp); + zeroreg = copy_to_mode_reg (QImode, const0_rtx); + destexp = gen_rtx_PLUS (Pmode, destreg, countreg); + emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); + } + else if (count != 0 + && (align >= 8 + || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) + || optimize_size || count < (unsigned int) 64)) + { + int size = TARGET_64BIT && !optimize_size ? 8 : 4; + unsigned HOST_WIDE_INT offset = 0; + + emit_insn (gen_cld ()); + + zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); + if (count & ~(size - 1)) + { + unsigned HOST_WIDE_INT repcount; + unsigned int max_nonrep; + + repcount = count >> (size == 4 ? 2 : 3); + if (!TARGET_64BIT) + repcount &= 0x3fffffff; + + /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. + movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN + bytes. In both cases the latter seems to be faster for small + values of N. */ + max_nonrep = size == 4 ? 7 : 4; + if (!optimize_size) + switch (ix86_tune) + { + case PROCESSOR_PENTIUM4: + case PROCESSOR_NOCONA: + max_nonrep = 3; + break; + default: + break; + } + + if (repcount <= max_nonrep) + while (repcount-- > 0) + { + rtx mem = adjust_automodify_address_nv (dst, + GET_MODE (zeroreg), + destreg, offset); + emit_insn (gen_strset (destreg, mem, zeroreg)); + offset += size; + } + else + { + countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); + countreg = ix86_zero_extend_to_Pmode (countreg); + destexp = gen_rtx_ASHIFT (Pmode, countreg, + GEN_INT (size == 4 ? 2 : 3)); + destexp = gen_rtx_PLUS (Pmode, destexp, destreg); + emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, + destexp)); + offset = count & ~(size - 1); + } + } + if (size == 8 && (count & 0x04)) + { + rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, + offset); + emit_insn (gen_strset (destreg, mem, + gen_rtx_SUBREG (SImode, zeroreg, 0))); + offset += 4; + } + if (count & 0x02) + { + rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, + offset); + emit_insn (gen_strset (destreg, mem, + gen_rtx_SUBREG (HImode, zeroreg, 0))); + offset += 2; + } + if (count & 0x01) + { + rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, + offset); + emit_insn (gen_strset (destreg, mem, + gen_rtx_SUBREG (QImode, zeroreg, 0))); + } + } + else + { + rtx countreg2; + rtx label = NULL; + /* Compute desired alignment of the string operation. */ + int desired_alignment = (TARGET_PENTIUMPRO + && (count == 0 || count >= (unsigned int) 260) + ? 8 : UNITS_PER_WORD); + + /* In case we don't know anything about the alignment, default to + library version, since it is usually equally fast and result in + shorter code. + + Also emit call when we know that the count is large and call overhead + will not be important. */ + if (!TARGET_INLINE_ALL_STRINGOPS + && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) + return 0; + + if (TARGET_SINGLE_STRINGOP) + emit_insn (gen_cld ()); + + countreg2 = gen_reg_rtx (Pmode); + countreg = copy_to_mode_reg (counter_mode, count_exp); + zeroreg = copy_to_mode_reg (Pmode, const0_rtx); + /* Get rid of MEM_OFFSET, it won't be accurate. */ + dst = change_address (dst, BLKmode, destreg); + + if (count == 0 && align < desired_alignment) + { + label = gen_label_rtx (); + emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), + LEU, 0, counter_mode, 1, label); + } + if (align <= 1) + { + rtx label = ix86_expand_aligntest (destreg, 1); + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (QImode, zeroreg, 0))); + ix86_adjust_counter (countreg, 1); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align <= 2) + { + rtx label = ix86_expand_aligntest (destreg, 2); + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (HImode, zeroreg, 0))); + ix86_adjust_counter (countreg, 2); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align <= 4 && desired_alignment > 4) + { + rtx label = ix86_expand_aligntest (destreg, 4); + emit_insn (gen_strset (destreg, dst, + (TARGET_64BIT + ? gen_rtx_SUBREG (SImode, zeroreg, 0) + : zeroreg))); + ix86_adjust_counter (countreg, 4); + emit_label (label); + LABEL_NUSES (label) = 1; + } + + if (label && desired_alignment > 4 && !TARGET_64BIT) + { + emit_label (label); + LABEL_NUSES (label) = 1; + label = NULL_RTX; + } + + if (!TARGET_SINGLE_STRINGOP) + emit_insn (gen_cld ()); + if (TARGET_64BIT) + { + emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), + GEN_INT (3))); + destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); + } + else + { + emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); + destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); + } + destexp = gen_rtx_PLUS (Pmode, destexp, destreg); + emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); + + if (label) + { + emit_label (label); + LABEL_NUSES (label) = 1; + } + + if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (SImode, zeroreg, 0))); + if (TARGET_64BIT && (align <= 4 || count == 0)) + { + rtx label = ix86_expand_aligntest (countreg, 4); + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (SImode, zeroreg, 0))); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align > 2 && count != 0 && (count & 2)) + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (HImode, zeroreg, 0))); + if (align <= 2 || count == 0) + { + rtx label = ix86_expand_aligntest (countreg, 2); + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (HImode, zeroreg, 0))); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (align > 1 && count != 0 && (count & 1)) + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (QImode, zeroreg, 0))); + if (align <= 1 || count == 0) + { + rtx label = ix86_expand_aligntest (countreg, 1); + emit_insn (gen_strset (destreg, dst, + gen_rtx_SUBREG (QImode, zeroreg, 0))); + emit_label (label); + LABEL_NUSES (label) = 1; + } + } + return 1; +} + +/* Expand strlen. */ +int +ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) +{ + rtx addr, scratch1, scratch2, scratch3, scratch4; + + /* The generic case of strlen expander is long. Avoid it's + expanding unless TARGET_INLINE_ALL_STRINGOPS. */ + + if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 + && !TARGET_INLINE_ALL_STRINGOPS + && !optimize_size + && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) + return 0; + + addr = force_reg (Pmode, XEXP (src, 0)); + scratch1 = gen_reg_rtx (Pmode); + + if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 + && !optimize_size) + { + /* Well it seems that some optimizer does not combine a call like + foo(strlen(bar), strlen(bar)); + when the move and the subtraction is done here. It does calculate + the length just once when these instructions are done inside of + output_strlen_unroll(). But I think since &bar[strlen(bar)] is + often used and I use one fewer register for the lifetime of + output_strlen_unroll() this is better. */ + + emit_move_insn (out, addr); + + ix86_expand_strlensi_unroll_1 (out, src, align); + + /* strlensi_unroll_1 returns the address of the zero at the end of + the string, like memchr(), so compute the length by subtracting + the start address. */ + if (TARGET_64BIT) + emit_insn (gen_subdi3 (out, out, addr)); + else + emit_insn (gen_subsi3 (out, out, addr)); + } + else + { + rtx unspec; + scratch2 = gen_reg_rtx (Pmode); + scratch3 = gen_reg_rtx (Pmode); + scratch4 = force_reg (Pmode, constm1_rtx); + + emit_move_insn (scratch3, addr); + eoschar = force_reg (QImode, eoschar); + + emit_insn (gen_cld ()); + src = replace_equiv_address_nv (src, scratch3); + + /* If .md starts supporting :P, this can be done in .md. */ + unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, + scratch4), UNSPEC_SCAS); + emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); + if (TARGET_64BIT) + { + emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); + emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); + } + else + { + emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); + emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); + } + } + return 1; +} + +/* Expand the appropriate insns for doing strlen if not just doing + repnz; scasb + + out = result, initialized with the start address + align_rtx = alignment of the address. + scratch = scratch register, initialized with the startaddress when + not aligned, otherwise undefined + + This is just the body. It needs the initializations mentioned above and + some address computing at the end. These things are done in i386.md. */ + +static void +ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) +{ + int align; + rtx tmp; + rtx align_2_label = NULL_RTX; + rtx align_3_label = NULL_RTX; + rtx align_4_label = gen_label_rtx (); + rtx end_0_label = gen_label_rtx (); + rtx mem; + rtx tmpreg = gen_reg_rtx (SImode); + rtx scratch = gen_reg_rtx (SImode); + rtx cmp; + + align = 0; + if (GET_CODE (align_rtx) == CONST_INT) + align = INTVAL (align_rtx); + + /* Loop to check 1..3 bytes for null to get an aligned pointer. */ + + /* Is there a known alignment and is it less than 4? */ + if (align < 4) + { + rtx scratch1 = gen_reg_rtx (Pmode); + emit_move_insn (scratch1, out); + /* Is there a known alignment and is it not 2? */ + if (align != 2) + { + align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ + align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ + + /* Leave just the 3 lower bits. */ + align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, + Pmode, 1, align_4_label); + emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, + Pmode, 1, align_2_label); + emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, + Pmode, 1, align_3_label); + } + else + { + /* Since the alignment is 2, we have to check 2 or 0 bytes; + check if is aligned to 4 - byte. */ + + align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, + NULL_RTX, 0, OPTAB_WIDEN); + + emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, + Pmode, 1, align_4_label); + } + + mem = change_address (src, QImode, out); + + /* Now compare the bytes. */ + + /* Compare the first n unaligned byte on a byte per byte basis. */ + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, + QImode, 1, end_0_label); + + /* Increment the address. */ + if (TARGET_64BIT) + emit_insn (gen_adddi3 (out, out, const1_rtx)); + else + emit_insn (gen_addsi3 (out, out, const1_rtx)); + + /* Not needed with an alignment of 2 */ + if (align != 2) + { + emit_label (align_2_label); + + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, + end_0_label); + + if (TARGET_64BIT) + emit_insn (gen_adddi3 (out, out, const1_rtx)); + else + emit_insn (gen_addsi3 (out, out, const1_rtx)); + + emit_label (align_3_label); + } + + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, + end_0_label); + + if (TARGET_64BIT) + emit_insn (gen_adddi3 (out, out, const1_rtx)); + else + emit_insn (gen_addsi3 (out, out, const1_rtx)); + } + + /* Generate loop to check 4 bytes at a time. It is not a good idea to + align this loop. It gives only huge programs, but does not help to + speed up. */ + emit_label (align_4_label); + + mem = change_address (src, SImode, out); + emit_move_insn (scratch, mem); + if (TARGET_64BIT) + emit_insn (gen_adddi3 (out, out, GEN_INT (4))); + else + emit_insn (gen_addsi3 (out, out, GEN_INT (4))); + + /* This formula yields a nonzero result iff one of the bytes is zero. + This saves three branches inside loop and many cycles. */ + + emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); + emit_insn (gen_one_cmplsi2 (scratch, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, + gen_int_mode (0x80808080, SImode))); + emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, + align_4_label); + + if (TARGET_CMOVE) + { + rtx reg = gen_reg_rtx (SImode); + rtx reg2 = gen_reg_rtx (Pmode); + emit_move_insn (reg, tmpreg); + emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); + + /* If zero is not in the first two bytes, move two bytes forward. */ + emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_IF_THEN_ELSE (SImode, tmp, + reg, + tmpreg))); + /* Emit lea manually to avoid clobbering of flags. */ + emit_insn (gen_rtx_SET (SImode, reg2, + gen_rtx_PLUS (Pmode, out, const2_rtx))); + + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, out, + gen_rtx_IF_THEN_ELSE (Pmode, tmp, + reg2, + out))); + + } + else + { + rtx end_2_label = gen_label_rtx (); + /* Is zero in the first two bytes? */ + + emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, end_2_label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = end_2_label; + + /* Not in the first two. Move two bytes forward. */ + emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); + if (TARGET_64BIT) + emit_insn (gen_adddi3 (out, out, const2_rtx)); + else + emit_insn (gen_addsi3 (out, out, const2_rtx)); + + emit_label (end_2_label); + + } + + /* Avoid branch in fixing the byte. */ + tmpreg = gen_lowpart (QImode, tmpreg); + emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); + cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); + if (TARGET_64BIT) + emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); + else + emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); + + emit_label (end_0_label); +} + +void +ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, + rtx callarg2 ATTRIBUTE_UNUSED, + rtx pop, int sibcall) +{ + rtx use = NULL, call; + + if (pop == const0_rtx) + pop = NULL; + gcc_assert (!TARGET_64BIT || !pop); + + if (TARGET_MACHO && !TARGET_64BIT) + { +#if TARGET_MACHO + if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) + fnaddr = machopic_indirect_call_target (fnaddr); +#endif + } + else + { + /* Static functions and indirect calls don't need the pic register. */ + if (! TARGET_64BIT && flag_pic + && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) + use_reg (&use, pic_offset_table_rtx); + } + + if (TARGET_64BIT && INTVAL (callarg2) >= 0) + { + rtx al = gen_rtx_REG (QImode, 0); + emit_move_insn (al, callarg2); + use_reg (&use, al); + } + + if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) + { + fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); + fnaddr = gen_rtx_MEM (QImode, fnaddr); + } + if (sibcall && TARGET_64BIT + && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) + { + rtx addr; + addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); + fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); + emit_move_insn (fnaddr, addr); + fnaddr = gen_rtx_MEM (QImode, fnaddr); + } + + call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + if (retval) + call = gen_rtx_SET (VOIDmode, retval, call); + if (pop) + { + pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); + pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); + call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); + } + + call = emit_call_insn (call); + if (use) + CALL_INSN_FUNCTION_USAGE (call) = use; +} + + +/* Clear stack slot assignments remembered from previous functions. + This is called from INIT_EXPANDERS once before RTL is emitted for each + function. */ + +static struct machine_function * +ix86_init_machine_status (void) +{ + struct machine_function *f; + + f = ggc_alloc_cleared (sizeof (struct machine_function)); + f->use_fast_prologue_epilogue_nregs = -1; + f->tls_descriptor_call_expanded_p = 0; + + return f; +} + +/* Return a MEM corresponding to a stack slot with mode MODE. + Allocate a new slot if necessary. + + The RTL for a function can have several slots available: N is + which slot to use. */ + +rtx +assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) +{ + struct stack_local_entry *s; + + gcc_assert (n < MAX_386_STACK_LOCALS); + + /* Virtual slot is valid only before vregs are instantiated. */ + gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); + + for (s = ix86_stack_locals; s; s = s->next) + if (s->mode == mode && s->n == n) + return s->rtl; + + s = (struct stack_local_entry *) + ggc_alloc (sizeof (struct stack_local_entry)); + s->n = n; + s->mode = mode; + s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); + + s->next = ix86_stack_locals; + ix86_stack_locals = s; + return s->rtl; +} + +/* Construct the SYMBOL_REF for the tls_get_addr function. */ + +static GTY(()) rtx ix86_tls_symbol; +rtx +ix86_tls_get_addr (void) +{ + + if (!ix86_tls_symbol) + { + ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, + (TARGET_ANY_GNU_TLS + && !TARGET_64BIT) + ? "___tls_get_addr" + : "__tls_get_addr"); + } + + return ix86_tls_symbol; +} + +/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ + +static GTY(()) rtx ix86_tls_module_base_symbol; +rtx +ix86_tls_module_base (void) +{ + + if (!ix86_tls_module_base_symbol) + { + ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, + "_TLS_MODULE_BASE_"); + SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) + |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return ix86_tls_module_base_symbol; +} + +/* Calculate the length of the memory address in the instruction + encoding. Does not include the one-byte modrm, opcode, or prefix. */ + +int +memory_address_length (rtx addr) +{ + struct ix86_address parts; + rtx base, index, disp; + int len; + int ok; + + if (GET_CODE (addr) == PRE_DEC + || GET_CODE (addr) == POST_INC + || GET_CODE (addr) == PRE_MODIFY + || GET_CODE (addr) == POST_MODIFY) + return 0; + + ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + + base = parts.base; + index = parts.index; + disp = parts.disp; + len = 0; + + /* Rule of thumb: + - esp as the base always wants an index, + - ebp as the base always wants a displacement. */ + + /* Register Indirect. */ + if (base && !index && !disp) + { + /* esp (for its index) and ebp (for its displacement) need + the two-byte modrm form. */ + if (addr == stack_pointer_rtx + || addr == arg_pointer_rtx + || addr == frame_pointer_rtx + || addr == hard_frame_pointer_rtx) + len = 1; + } + + /* Direct Addressing. */ + else if (disp && !base && !index) + len = 4; + + else + { + /* Find the length of the displacement constant. */ + if (disp) + { + if (base && satisfies_constraint_K (disp)) + len = 1; + else + len = 4; + } + /* ebp always wants a displacement. */ + else if (base == hard_frame_pointer_rtx) + len = 1; + + /* An index requires the two-byte modrm form.... */ + if (index + /* ...like esp, which always wants an index. */ + || base == stack_pointer_rtx + || base == arg_pointer_rtx + || base == frame_pointer_rtx) + len += 1; + } + + return len; +} + +/* Compute default value for "length_immediate" attribute. When SHORTFORM + is set, expect that insn have 8bit immediate alternative. */ +int +ix86_attr_length_immediate_default (rtx insn, int shortform) +{ + int len = 0; + int i; + extract_insn_cached (insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (CONSTANT_P (recog_data.operand[i])) + { + gcc_assert (!len); + if (shortform && satisfies_constraint_K (recog_data.operand[i])) + len = 1; + else + { + switch (get_attr_mode (insn)) + { + case MODE_QI: + len+=1; + break; + case MODE_HI: + len+=2; + break; + case MODE_SI: + len+=4; + break; + /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ + case MODE_DI: + len+=4; + break; + default: + fatal_insn ("unknown insn mode", insn); + } + } + } + return len; +} +/* Compute default value for "length_address" attribute. */ +int +ix86_attr_length_address_default (rtx insn) +{ + int i; + + if (get_attr_type (insn) == TYPE_LEA) + { + rtx set = PATTERN (insn); + + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + + gcc_assert (GET_CODE (set) == SET); + + return memory_address_length (SET_SRC (set)); + } + + extract_insn_cached (insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (GET_CODE (recog_data.operand[i]) == MEM) + { + return memory_address_length (XEXP (recog_data.operand[i], 0)); + break; + } + return 0; +} + +/* Return the maximum number of instructions a cpu can issue. */ + +static int +ix86_issue_rate (void) +{ + switch (ix86_tune) + { + case PROCESSOR_PENTIUM: + case PROCESSOR_K6: + return 2; + + case PROCESSOR_PENTIUMPRO: + case PROCESSOR_PENTIUM4: + case PROCESSOR_ATHLON: + case PROCESSOR_K8: + case PROCESSOR_NOCONA: + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + return 3; + /* APPLE LOCAL begin mainline */ + case PROCESSOR_CORE2: + return 4; + /* APPLE LOCAL end mainline */ + + default: + return 1; + } +} + +/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set + by DEP_INSN and nothing set by DEP_INSN. */ + +static int +ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) +{ + rtx set, set2; + + /* Simplify the test for uninteresting insns. */ + if (insn_type != TYPE_SETCC + && insn_type != TYPE_ICMOV + && insn_type != TYPE_FCMOV + && insn_type != TYPE_IBR) + return 0; + + if ((set = single_set (dep_insn)) != 0) + { + set = SET_DEST (set); + set2 = NULL_RTX; + } + else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL + && XVECLEN (PATTERN (dep_insn), 0) == 2 + && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET + && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) + { + set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); + set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); + } + else + return 0; + + if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) + return 0; + + /* This test is true if the dependent insn reads the flags but + not any other potentially set register. */ + if (!reg_overlap_mentioned_p (set, PATTERN (insn))) + return 0; + + if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) + return 0; + + return 1; +} + +/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory + address with operands set by DEP_INSN. */ + +static int +ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) +{ + rtx addr; + + if (insn_type == TYPE_LEA + && TARGET_PENTIUM) + { + addr = PATTERN (insn); + + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + + gcc_assert (GET_CODE (addr) == SET); + + addr = SET_SRC (addr); + } + else + { + int i; + extract_insn_cached (insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (GET_CODE (recog_data.operand[i]) == MEM) + { + addr = XEXP (recog_data.operand[i], 0); + goto found; + } + return 0; + found:; + } + + return modified_in_p (addr, dep_insn); +} + +static int +ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type insn_type, dep_insn_type; + enum attr_memory memory; + rtx set, set2; + int dep_insn_code_number; + + /* Anti and output dependencies have zero cost on all CPUs. */ + if (REG_NOTE_KIND (link) != 0) + return 0; + + dep_insn_code_number = recog_memoized (dep_insn); + + /* If we can't recognize the insns, we can't really do anything. */ + if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) + return cost; + + insn_type = get_attr_type (insn); + dep_insn_type = get_attr_type (dep_insn); + + switch (ix86_tune) + { + case PROCESSOR_PENTIUM: + /* Address Generation Interlock adds a cycle of latency. */ + if (ix86_agi_dependent (insn, dep_insn, insn_type)) + cost += 1; + + /* ??? Compares pair with jump/setcc. */ + if (ix86_flags_dependent (insn, dep_insn, insn_type)) + cost = 0; + + /* Floating point stores require value to be ready one cycle earlier. */ + if (insn_type == TYPE_FMOV + && get_attr_memory (insn) == MEMORY_STORE + && !ix86_agi_dependent (insn, dep_insn, insn_type)) + cost += 1; + break; + + case PROCESSOR_PENTIUMPRO: + memory = get_attr_memory (insn); + + /* INT->FP conversion is expensive. */ + if (get_attr_fp_int_src (dep_insn)) + cost += 5; + + /* There is one cycle extra latency between an FP op and a store. */ + if (insn_type == TYPE_FMOV + && (set = single_set (dep_insn)) != NULL_RTX + && (set2 = single_set (insn)) != NULL_RTX + && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) + && GET_CODE (SET_DEST (set2)) == MEM) + cost += 1; + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (insn, dep_insn, insn_type)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 1) + cost--; + } + break; + + case PROCESSOR_K6: + memory = get_attr_memory (insn); + + /* The esp dependency is resolved before the instruction is really + finished. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) + return 1; + + /* INT->FP conversion is expensive. */ + if (get_attr_fp_int_src (dep_insn)) + cost += 5; + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (insn, dep_insn, insn_type)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 2) + cost -= 2; + else + cost = 1; + } + break; + + case PROCESSOR_ATHLON: + case PROCESSOR_K8: + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + memory = get_attr_memory (insn); + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (insn, dep_insn, insn_type)) + { + enum attr_unit unit = get_attr_unit (insn); + int loadcost = 3; + + /* Because of the difference between the length of integer and + floating unit pipeline preparation stages, the memory operands + for floating point are cheaper. + + ??? For Athlon it the difference is most probably 2. */ + if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) + loadcost = 3; + else + loadcost = TARGET_ATHLON ? 2 : 0; + + if (cost >= loadcost) + cost -= loadcost; + else + cost = 0; + } + + default: + break; + } + + return cost; +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. */ + +static int +ia32_multipass_dfa_lookahead (void) +{ + if (ix86_tune == PROCESSOR_PENTIUM) + return 2; + + if (ix86_tune == PROCESSOR_PENTIUMPRO + || ix86_tune == PROCESSOR_K6) + return 1; + + else + return 0; +} + + +/* Compute the alignment given to a constant that is being placed in memory. + EXP is the constant and ALIGN is the alignment that the object would + ordinarily have. + The value of this function is used instead of that alignment to align + the object. */ + +int +ix86_constant_alignment (tree exp, int align) +{ + if (TREE_CODE (exp) == REAL_CST) + { + if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) + return 64; + else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) + return 128; + } + else if (!optimize_size && TREE_CODE (exp) == STRING_CST + && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) + return BITS_PER_WORD; + +/* APPLE LOCAL begin 4090661 */ +#if TARGET_MACHO + /* Without this, static arrays initialized to strings get aligned + to 32 bytes. These go in cstring, so would result in a lot of extra + padding in files with a couple of small strings. 4090661. */ + else if (TREE_CODE (exp) == STRING_CST) + { + if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size) + return BITS_PER_WORD; + else + return 8; + } +#endif +/* APPLE LOCAL end 4090661 */ + return align; +} + +/* Compute the alignment for a static variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this function is used + instead of that alignment to align the object. */ + +int +ix86_data_alignment (tree type, int align) +{ + int max_align = optimize_size ? BITS_PER_WORD : 256; + + if (AGGREGATE_TYPE_P (type) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) + && align < max_align) + align = max_align; + + /* x86-64 ABI requires arrays greater than 16 bytes to be aligned + to 16byte boundary. */ + if (TARGET_64BIT) + { + if (AGGREGATE_TYPE_P (type) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + return 128; + } + + if (TREE_CODE (type) == ARRAY_TYPE) + { + if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == COMPLEX_TYPE) + { + + if (TYPE_MODE (type) == DCmode && align < 64) + return 64; + if (TYPE_MODE (type) == XCmode && align < 128) + return 128; + } + else if ((TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + && TYPE_FIELDS (type)) + { + if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE + || TREE_CODE (type) == INTEGER_TYPE) + { + if (TYPE_MODE (type) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) + return 128; + } + + return align; +} + +/* Compute the alignment for a local variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. */ + +int +ix86_local_alignment (tree type, int align) +{ + /* x86-64 ABI requires arrays greater than 16 bytes to be aligned + to 16byte boundary. */ + if (TARGET_64BIT) + { + if (AGGREGATE_TYPE_P (type) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + return 128; + } + if (TREE_CODE (type) == ARRAY_TYPE) + { + if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == COMPLEX_TYPE) + { + if (TYPE_MODE (type) == DCmode && align < 64) + return 64; + if (TYPE_MODE (type) == XCmode && align < 128) + return 128; + } + else if ((TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + && TYPE_FIELDS (type)) + { + if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE + || TREE_CODE (type) == INTEGER_TYPE) + { + + if (TYPE_MODE (type) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) + return 128; + } + return align; +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ +void +x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) +{ + if (!TARGET_64BIT) + { + /* Compute offset from the end of the jmp to the target function. */ + rtx disp = expand_binop (SImode, sub_optab, fnaddr, + plus_constant (tramp, 10), + NULL_RTX, 1, OPTAB_DIRECT); + emit_move_insn (gen_rtx_MEM (QImode, tramp), + gen_int_mode (0xb9, QImode)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); + emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), + gen_int_mode (0xe9, QImode)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); + } + else + { + int offset = 0; + /* Try to load address using shorter movl instead of movabs. + We may want to support movq for kernel mode, but kernel does not use + trampolines at the moment. */ + if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) + { + fnaddr = copy_to_mode_reg (DImode, fnaddr); + emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), + gen_int_mode (0xbb41, HImode)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), + gen_lowpart (SImode, fnaddr)); + offset += 6; + } + else + { + emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), + gen_int_mode (0xbb49, HImode)); + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), + fnaddr); + offset += 10; + } + /* Load static chain using movabs to r10. */ + emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), + gen_int_mode (0xba49, HImode)); + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), + cxt); + offset += 10; + /* Jump to the r11 */ + emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), + gen_int_mode (0xff49, HImode)); + emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), + gen_int_mode (0xe3, QImode)); + offset += 3; + gcc_assert (offset <= TRAMPOLINE_SIZE); + } + +#ifdef ENABLE_EXECUTE_STACK + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, tramp, Pmode); +#endif +} + +/* Codes for all the SSE/MMX builtins. */ +enum ix86_builtins +{ + IX86_BUILTIN_ADDPS, + IX86_BUILTIN_ADDSS, + IX86_BUILTIN_DIVPS, + IX86_BUILTIN_DIVSS, + IX86_BUILTIN_MULPS, + IX86_BUILTIN_MULSS, + IX86_BUILTIN_SUBPS, + IX86_BUILTIN_SUBSS, + + IX86_BUILTIN_CMPEQPS, + IX86_BUILTIN_CMPLTPS, + IX86_BUILTIN_CMPLEPS, + IX86_BUILTIN_CMPGTPS, + IX86_BUILTIN_CMPGEPS, + IX86_BUILTIN_CMPNEQPS, + IX86_BUILTIN_CMPNLTPS, + IX86_BUILTIN_CMPNLEPS, + IX86_BUILTIN_CMPNGTPS, + IX86_BUILTIN_CMPNGEPS, + IX86_BUILTIN_CMPORDPS, + IX86_BUILTIN_CMPUNORDPS, + IX86_BUILTIN_CMPEQSS, + IX86_BUILTIN_CMPLTSS, + IX86_BUILTIN_CMPLESS, + IX86_BUILTIN_CMPNEQSS, + IX86_BUILTIN_CMPNLTSS, + IX86_BUILTIN_CMPNLESS, + IX86_BUILTIN_CMPNGTSS, + IX86_BUILTIN_CMPNGESS, + IX86_BUILTIN_CMPORDSS, + IX86_BUILTIN_CMPUNORDSS, + + IX86_BUILTIN_COMIEQSS, + IX86_BUILTIN_COMILTSS, + IX86_BUILTIN_COMILESS, + IX86_BUILTIN_COMIGTSS, + IX86_BUILTIN_COMIGESS, + IX86_BUILTIN_COMINEQSS, + IX86_BUILTIN_UCOMIEQSS, + IX86_BUILTIN_UCOMILTSS, + IX86_BUILTIN_UCOMILESS, + IX86_BUILTIN_UCOMIGTSS, + IX86_BUILTIN_UCOMIGESS, + IX86_BUILTIN_UCOMINEQSS, + + IX86_BUILTIN_CVTPI2PS, + IX86_BUILTIN_CVTPS2PI, + IX86_BUILTIN_CVTSI2SS, + IX86_BUILTIN_CVTSI642SS, + IX86_BUILTIN_CVTSS2SI, + IX86_BUILTIN_CVTSS2SI64, + IX86_BUILTIN_CVTTPS2PI, + IX86_BUILTIN_CVTTSS2SI, + IX86_BUILTIN_CVTTSS2SI64, + + IX86_BUILTIN_MAXPS, + IX86_BUILTIN_MAXSS, + IX86_BUILTIN_MINPS, + IX86_BUILTIN_MINSS, + + IX86_BUILTIN_LOADUPS, + IX86_BUILTIN_STOREUPS, + IX86_BUILTIN_MOVSS, + + IX86_BUILTIN_MOVHLPS, + IX86_BUILTIN_MOVLHPS, + IX86_BUILTIN_LOADHPS, + IX86_BUILTIN_LOADLPS, + IX86_BUILTIN_STOREHPS, + IX86_BUILTIN_STORELPS, + + IX86_BUILTIN_MASKMOVQ, + IX86_BUILTIN_MOVMSKPS, + IX86_BUILTIN_PMOVMSKB, + + IX86_BUILTIN_MOVNTPS, + IX86_BUILTIN_MOVNTQ, + + IX86_BUILTIN_LOADDQU, + IX86_BUILTIN_STOREDQU, + + IX86_BUILTIN_PACKSSWB, + IX86_BUILTIN_PACKSSDW, + IX86_BUILTIN_PACKUSWB, + + IX86_BUILTIN_PADDB, + IX86_BUILTIN_PADDW, + IX86_BUILTIN_PADDD, + IX86_BUILTIN_PADDQ, + IX86_BUILTIN_PADDSB, + IX86_BUILTIN_PADDSW, + IX86_BUILTIN_PADDUSB, + IX86_BUILTIN_PADDUSW, + IX86_BUILTIN_PSUBB, + IX86_BUILTIN_PSUBW, + IX86_BUILTIN_PSUBD, + IX86_BUILTIN_PSUBQ, + IX86_BUILTIN_PSUBSB, + IX86_BUILTIN_PSUBSW, + IX86_BUILTIN_PSUBUSB, + IX86_BUILTIN_PSUBUSW, + + IX86_BUILTIN_PAND, + IX86_BUILTIN_PANDN, + IX86_BUILTIN_POR, + IX86_BUILTIN_PXOR, + + IX86_BUILTIN_PAVGB, + IX86_BUILTIN_PAVGW, + + IX86_BUILTIN_PCMPEQB, + IX86_BUILTIN_PCMPEQW, + IX86_BUILTIN_PCMPEQD, + IX86_BUILTIN_PCMPGTB, + IX86_BUILTIN_PCMPGTW, + IX86_BUILTIN_PCMPGTD, + + IX86_BUILTIN_PMADDWD, + + IX86_BUILTIN_PMAXSW, + IX86_BUILTIN_PMAXUB, + IX86_BUILTIN_PMINSW, + IX86_BUILTIN_PMINUB, + + IX86_BUILTIN_PMULHUW, + IX86_BUILTIN_PMULHW, + IX86_BUILTIN_PMULLW, + + IX86_BUILTIN_PSADBW, + IX86_BUILTIN_PSHUFW, + + IX86_BUILTIN_PSLLW, + IX86_BUILTIN_PSLLD, + IX86_BUILTIN_PSLLQ, + IX86_BUILTIN_PSRAW, + IX86_BUILTIN_PSRAD, + IX86_BUILTIN_PSRLW, + IX86_BUILTIN_PSRLD, + IX86_BUILTIN_PSRLQ, + IX86_BUILTIN_PSLLWI, + IX86_BUILTIN_PSLLDI, + IX86_BUILTIN_PSLLQI, + IX86_BUILTIN_PSRAWI, + IX86_BUILTIN_PSRADI, + IX86_BUILTIN_PSRLWI, + IX86_BUILTIN_PSRLDI, + IX86_BUILTIN_PSRLQI, + + IX86_BUILTIN_PUNPCKHBW, + IX86_BUILTIN_PUNPCKHWD, + IX86_BUILTIN_PUNPCKHDQ, + IX86_BUILTIN_PUNPCKLBW, + IX86_BUILTIN_PUNPCKLWD, + IX86_BUILTIN_PUNPCKLDQ, + + IX86_BUILTIN_SHUFPS, + + IX86_BUILTIN_RCPPS, + IX86_BUILTIN_RCPSS, + IX86_BUILTIN_RSQRTPS, + IX86_BUILTIN_RSQRTSS, + IX86_BUILTIN_SQRTPS, + IX86_BUILTIN_SQRTSS, + + IX86_BUILTIN_UNPCKHPS, + IX86_BUILTIN_UNPCKLPS, + + IX86_BUILTIN_ANDPS, + IX86_BUILTIN_ANDNPS, + IX86_BUILTIN_ORPS, + IX86_BUILTIN_XORPS, + + IX86_BUILTIN_EMMS, + IX86_BUILTIN_LDMXCSR, + IX86_BUILTIN_STMXCSR, + IX86_BUILTIN_SFENCE, + + /* 3DNow! Original */ + IX86_BUILTIN_FEMMS, + IX86_BUILTIN_PAVGUSB, + IX86_BUILTIN_PF2ID, + IX86_BUILTIN_PFACC, + IX86_BUILTIN_PFADD, + IX86_BUILTIN_PFCMPEQ, + IX86_BUILTIN_PFCMPGE, + IX86_BUILTIN_PFCMPGT, + IX86_BUILTIN_PFMAX, + IX86_BUILTIN_PFMIN, + IX86_BUILTIN_PFMUL, + IX86_BUILTIN_PFRCP, + IX86_BUILTIN_PFRCPIT1, + IX86_BUILTIN_PFRCPIT2, + IX86_BUILTIN_PFRSQIT1, + IX86_BUILTIN_PFRSQRT, + IX86_BUILTIN_PFSUB, + IX86_BUILTIN_PFSUBR, + IX86_BUILTIN_PI2FD, + IX86_BUILTIN_PMULHRW, + + /* 3DNow! Athlon Extensions */ + IX86_BUILTIN_PF2IW, + IX86_BUILTIN_PFNACC, + IX86_BUILTIN_PFPNACC, + IX86_BUILTIN_PI2FW, + IX86_BUILTIN_PSWAPDSI, + IX86_BUILTIN_PSWAPDSF, + + /* SSE2 */ + IX86_BUILTIN_ADDPD, + IX86_BUILTIN_ADDSD, + IX86_BUILTIN_DIVPD, + IX86_BUILTIN_DIVSD, + IX86_BUILTIN_MULPD, + IX86_BUILTIN_MULSD, + IX86_BUILTIN_SUBPD, + IX86_BUILTIN_SUBSD, + + IX86_BUILTIN_CMPEQPD, + IX86_BUILTIN_CMPLTPD, + IX86_BUILTIN_CMPLEPD, + IX86_BUILTIN_CMPGTPD, + IX86_BUILTIN_CMPGEPD, + IX86_BUILTIN_CMPNEQPD, + IX86_BUILTIN_CMPNLTPD, + IX86_BUILTIN_CMPNLEPD, + IX86_BUILTIN_CMPNGTPD, + IX86_BUILTIN_CMPNGEPD, + IX86_BUILTIN_CMPORDPD, + IX86_BUILTIN_CMPUNORDPD, + IX86_BUILTIN_CMPNEPD, + IX86_BUILTIN_CMPEQSD, + IX86_BUILTIN_CMPLTSD, + IX86_BUILTIN_CMPLESD, + IX86_BUILTIN_CMPNEQSD, + IX86_BUILTIN_CMPNLTSD, + IX86_BUILTIN_CMPNLESD, + IX86_BUILTIN_CMPORDSD, + IX86_BUILTIN_CMPUNORDSD, + IX86_BUILTIN_CMPNESD, + + IX86_BUILTIN_COMIEQSD, + IX86_BUILTIN_COMILTSD, + IX86_BUILTIN_COMILESD, + IX86_BUILTIN_COMIGTSD, + IX86_BUILTIN_COMIGESD, + IX86_BUILTIN_COMINEQSD, + IX86_BUILTIN_UCOMIEQSD, + IX86_BUILTIN_UCOMILTSD, + IX86_BUILTIN_UCOMILESD, + IX86_BUILTIN_UCOMIGTSD, + IX86_BUILTIN_UCOMIGESD, + IX86_BUILTIN_UCOMINEQSD, + + IX86_BUILTIN_MAXPD, + IX86_BUILTIN_MAXSD, + IX86_BUILTIN_MINPD, + IX86_BUILTIN_MINSD, + + IX86_BUILTIN_ANDPD, + IX86_BUILTIN_ANDNPD, + IX86_BUILTIN_ORPD, + IX86_BUILTIN_XORPD, + + IX86_BUILTIN_SQRTPD, + IX86_BUILTIN_SQRTSD, + + IX86_BUILTIN_UNPCKHPD, + IX86_BUILTIN_UNPCKLPD, + + IX86_BUILTIN_SHUFPD, + + IX86_BUILTIN_LOADUPD, + IX86_BUILTIN_STOREUPD, + IX86_BUILTIN_MOVSD, + + IX86_BUILTIN_LOADHPD, + IX86_BUILTIN_LOADLPD, + + IX86_BUILTIN_CVTDQ2PD, + IX86_BUILTIN_CVTDQ2PS, + + IX86_BUILTIN_CVTPD2DQ, + IX86_BUILTIN_CVTPD2PI, + IX86_BUILTIN_CVTPD2PS, + IX86_BUILTIN_CVTTPD2DQ, + IX86_BUILTIN_CVTTPD2PI, + + IX86_BUILTIN_CVTPI2PD, + IX86_BUILTIN_CVTSI2SD, + IX86_BUILTIN_CVTSI642SD, + + IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SI64, + IX86_BUILTIN_CVTSD2SS, + IX86_BUILTIN_CVTSS2SD, + IX86_BUILTIN_CVTTSD2SI, + IX86_BUILTIN_CVTTSD2SI64, + + IX86_BUILTIN_CVTPS2DQ, + IX86_BUILTIN_CVTPS2PD, + IX86_BUILTIN_CVTTPS2DQ, + + IX86_BUILTIN_MOVNTI, + IX86_BUILTIN_MOVNTPD, + IX86_BUILTIN_MOVNTDQ, + + /* SSE2 MMX */ + IX86_BUILTIN_MASKMOVDQU, + IX86_BUILTIN_MOVMSKPD, + IX86_BUILTIN_PMOVMSKB128, + + /* APPLE LOCAL begin 4099020 */ + IX86_BUILTIN_MOVQ, + IX86_BUILTIN_LOADQ, + IX86_BUILTIN_STOREQ, + /* APPLE LOCAL end 4099020 */ + + IX86_BUILTIN_PACKSSWB128, + IX86_BUILTIN_PACKSSDW128, + IX86_BUILTIN_PACKUSWB128, + + IX86_BUILTIN_PADDB128, + IX86_BUILTIN_PADDW128, + IX86_BUILTIN_PADDD128, + IX86_BUILTIN_PADDQ128, + IX86_BUILTIN_PADDSB128, + IX86_BUILTIN_PADDSW128, + IX86_BUILTIN_PADDUSB128, + IX86_BUILTIN_PADDUSW128, + IX86_BUILTIN_PSUBB128, + IX86_BUILTIN_PSUBW128, + IX86_BUILTIN_PSUBD128, + IX86_BUILTIN_PSUBQ128, + IX86_BUILTIN_PSUBSB128, + IX86_BUILTIN_PSUBSW128, + IX86_BUILTIN_PSUBUSB128, + IX86_BUILTIN_PSUBUSW128, + + IX86_BUILTIN_PAND128, + IX86_BUILTIN_PANDN128, + IX86_BUILTIN_POR128, + IX86_BUILTIN_PXOR128, + + IX86_BUILTIN_PAVGB128, + IX86_BUILTIN_PAVGW128, + + IX86_BUILTIN_PCMPEQB128, + IX86_BUILTIN_PCMPEQW128, + IX86_BUILTIN_PCMPEQD128, + IX86_BUILTIN_PCMPGTB128, + IX86_BUILTIN_PCMPGTW128, + IX86_BUILTIN_PCMPGTD128, + + IX86_BUILTIN_PMADDWD128, + + IX86_BUILTIN_PMAXSW128, + IX86_BUILTIN_PMAXUB128, + IX86_BUILTIN_PMINSW128, + IX86_BUILTIN_PMINUB128, + + IX86_BUILTIN_PMULUDQ, + IX86_BUILTIN_PMULUDQ128, + IX86_BUILTIN_PMULHUW128, + IX86_BUILTIN_PMULHW128, + IX86_BUILTIN_PMULLW128, + + IX86_BUILTIN_PSADBW128, + IX86_BUILTIN_PSHUFHW, + IX86_BUILTIN_PSHUFLW, + IX86_BUILTIN_PSHUFD, + + IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, + IX86_BUILTIN_PSLLQ128, + IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, + IX86_BUILTIN_PSRLW128, + IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128, + IX86_BUILTIN_PSLLDQI128, + /* APPLE LOCAL 591583 */ + IX86_BUILTIN_PSLLDQI128_BYTESHIFT, + IX86_BUILTIN_PSLLWI128, + IX86_BUILTIN_PSLLDI128, + IX86_BUILTIN_PSLLQI128, + IX86_BUILTIN_PSRAWI128, + IX86_BUILTIN_PSRADI128, + IX86_BUILTIN_PSRLDQI128, + /* APPLE LOCAL 591583 */ + IX86_BUILTIN_PSRLDQI128_BYTESHIFT, + IX86_BUILTIN_PSRLWI128, + IX86_BUILTIN_PSRLDI128, + IX86_BUILTIN_PSRLQI128, + + IX86_BUILTIN_PUNPCKHBW128, + IX86_BUILTIN_PUNPCKHWD128, + IX86_BUILTIN_PUNPCKHDQ128, + IX86_BUILTIN_PUNPCKHQDQ128, + IX86_BUILTIN_PUNPCKLBW128, + IX86_BUILTIN_PUNPCKLWD128, + IX86_BUILTIN_PUNPCKLDQ128, + IX86_BUILTIN_PUNPCKLQDQ128, + + IX86_BUILTIN_CLFLUSH, + IX86_BUILTIN_MFENCE, + IX86_BUILTIN_LFENCE, + + /* Prescott New Instructions. */ + IX86_BUILTIN_ADDSUBPS, + IX86_BUILTIN_HADDPS, + IX86_BUILTIN_HSUBPS, + IX86_BUILTIN_MOVSHDUP, + IX86_BUILTIN_MOVSLDUP, + IX86_BUILTIN_ADDSUBPD, + IX86_BUILTIN_HADDPD, + IX86_BUILTIN_HSUBPD, + IX86_BUILTIN_LDDQU, + + IX86_BUILTIN_MONITOR, + IX86_BUILTIN_MWAIT, + /* APPLE LOCAL begin mainline */ + /* Merom New Instructions. */ + IX86_BUILTIN_PHADDW, + IX86_BUILTIN_PHADDD, + IX86_BUILTIN_PHADDSW, + IX86_BUILTIN_PHSUBW, + IX86_BUILTIN_PHSUBD, + IX86_BUILTIN_PHSUBSW, + IX86_BUILTIN_PMADDUBSW, + IX86_BUILTIN_PMULHRSW, + IX86_BUILTIN_PSHUFB, + IX86_BUILTIN_PSIGNB, + IX86_BUILTIN_PSIGNW, + IX86_BUILTIN_PSIGND, + IX86_BUILTIN_PALIGNR, + IX86_BUILTIN_PABSB, + IX86_BUILTIN_PABSW, + IX86_BUILTIN_PABSD, + + IX86_BUILTIN_PHADDW128, + IX86_BUILTIN_PHADDD128, + IX86_BUILTIN_PHADDSW128, + IX86_BUILTIN_PHSUBW128, + IX86_BUILTIN_PHSUBD128, + IX86_BUILTIN_PHSUBSW128, + IX86_BUILTIN_PMADDUBSW128, + IX86_BUILTIN_PMULHRSW128, + IX86_BUILTIN_PSHUFB128, + IX86_BUILTIN_PSIGNB128, + IX86_BUILTIN_PSIGNW128, + IX86_BUILTIN_PSIGND128, + IX86_BUILTIN_PALIGNR128, + IX86_BUILTIN_PABSB128, + IX86_BUILTIN_PABSW128, + IX86_BUILTIN_PABSD128, + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* AMDFAM10 - SSE4A New Instructions. */ + IX86_BUILTIN_MOVNTSD, + IX86_BUILTIN_MOVNTSS, + IX86_BUILTIN_EXTRQI, + IX86_BUILTIN_EXTRQ, + IX86_BUILTIN_INSERTQI, + IX86_BUILTIN_INSERTQ, + + /* SSE4.1. */ + IX86_BUILTIN_BLENDPD, + IX86_BUILTIN_BLENDPS, + IX86_BUILTIN_BLENDVPD, + IX86_BUILTIN_BLENDVPS, + IX86_BUILTIN_PBLENDVB128, + IX86_BUILTIN_PBLENDW128, + + IX86_BUILTIN_DPPD, + IX86_BUILTIN_DPPS, + + IX86_BUILTIN_INSERTPS128, + + IX86_BUILTIN_MOVNTDQA, + IX86_BUILTIN_MPSADBW128, + IX86_BUILTIN_PACKUSDW128, + IX86_BUILTIN_PCMPEQQ, + IX86_BUILTIN_PHMINPOSUW128, + + IX86_BUILTIN_PMAXSB128, + IX86_BUILTIN_PMAXSD128, + IX86_BUILTIN_PMAXUD128, + IX86_BUILTIN_PMAXUW128, + + IX86_BUILTIN_PMINSB128, + IX86_BUILTIN_PMINSD128, + IX86_BUILTIN_PMINUD128, + IX86_BUILTIN_PMINUW128, + + IX86_BUILTIN_PMOVSXBW128, + IX86_BUILTIN_PMOVSXBD128, + IX86_BUILTIN_PMOVSXBQ128, + IX86_BUILTIN_PMOVSXWD128, + IX86_BUILTIN_PMOVSXWQ128, + IX86_BUILTIN_PMOVSXDQ128, + + IX86_BUILTIN_PMOVZXBW128, + IX86_BUILTIN_PMOVZXBD128, + IX86_BUILTIN_PMOVZXBQ128, + IX86_BUILTIN_PMOVZXWD128, + IX86_BUILTIN_PMOVZXWQ128, + IX86_BUILTIN_PMOVZXDQ128, + + IX86_BUILTIN_PMULDQ128, + IX86_BUILTIN_PMULLD128, + + IX86_BUILTIN_ROUNDPD, + IX86_BUILTIN_ROUNDPS, + IX86_BUILTIN_ROUNDSD, + IX86_BUILTIN_ROUNDSS, + + IX86_BUILTIN_PTESTZ, + IX86_BUILTIN_PTESTC, + IX86_BUILTIN_PTESTNZC, + /* APPLE LOCAL end 5612787 mainline sse4 */ + /* APPLE LOCAL end mainline */ + IX86_BUILTIN_VEC_INIT_V2SI, + IX86_BUILTIN_VEC_INIT_V4HI, + IX86_BUILTIN_VEC_INIT_V8QI, + IX86_BUILTIN_VEC_EXT_V2DF, + IX86_BUILTIN_VEC_EXT_V2DI, + IX86_BUILTIN_VEC_EXT_V4SF, + IX86_BUILTIN_VEC_EXT_V4SI, + IX86_BUILTIN_VEC_EXT_V8HI, + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* deletion */ + /* APPLE LOCAL end 5612787 mainline sse4 */ + IX86_BUILTIN_VEC_EXT_V2SI, + IX86_BUILTIN_VEC_EXT_V4HI, + /* APPLE LOCAL begin 5612787 mainline sse4 */ + IX86_BUILTIN_VEC_EXT_V16QI, + IX86_BUILTIN_VEC_SET_V2DI, + IX86_BUILTIN_VEC_SET_V4SF, + IX86_BUILTIN_VEC_SET_V4SI, + /* APPLE LOCAL end 5612787 mainline sse4 */ + IX86_BUILTIN_VEC_SET_V8HI, + IX86_BUILTIN_VEC_SET_V4HI, + /* APPLE LOCAL begin 5612787 mainline sse4 */ + IX86_BUILTIN_VEC_SET_V16QI, + + IX86_BUILTIN_VEC_PACK_SFIX, + + /* SSE4.2. */ + IX86_BUILTIN_CRC32QI, + IX86_BUILTIN_CRC32HI, + IX86_BUILTIN_CRC32SI, + IX86_BUILTIN_CRC32DI, + + IX86_BUILTIN_PCMPESTRI128, + IX86_BUILTIN_PCMPESTRM128, + IX86_BUILTIN_PCMPESTRA128, + IX86_BUILTIN_PCMPESTRC128, + IX86_BUILTIN_PCMPESTRO128, + IX86_BUILTIN_PCMPESTRS128, + IX86_BUILTIN_PCMPESTRZ128, + IX86_BUILTIN_PCMPISTRI128, + IX86_BUILTIN_PCMPISTRM128, + IX86_BUILTIN_PCMPISTRA128, + IX86_BUILTIN_PCMPISTRC128, + IX86_BUILTIN_PCMPISTRO128, + IX86_BUILTIN_PCMPISTRS128, + IX86_BUILTIN_PCMPISTRZ128, + + IX86_BUILTIN_PCMPGTQ, + + /* TFmode support builtins. */ + IX86_BUILTIN_INFQ, + IX86_BUILTIN_FABSQ, + IX86_BUILTIN_COPYSIGNQ, + /* APPLE LOCAL end 5612787 mainline sse4 */ + + IX86_BUILTIN_MAX +}; + +#define def_builtin(MASK, NAME, TYPE, CODE) \ +do { \ + if ((MASK) & target_flags \ + && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ + lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ +} while (0) + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +/* Like def_builtin, but also marks the function decl "const". */ + +static inline tree +def_builtin_const (int mask, const char *name, tree type, + enum ix86_builtins code) +{ + tree decl = NULL_TREE; + if ((mask) & target_flags + && (!((mask) & MASK_64BIT) || TARGET_64BIT)) + decl = lang_hooks.builtin_function (name, type, code, BUILT_IN_MD, + NULL, NULL_TREE); + + if (decl) + TREE_READONLY (decl) = 1; + return decl; +} +/* APPLE LOCAL end 5612787 mainline sse4 */ + +/* Bits for builtin_description.flag. */ + +/* Set when we don't support the comparison natively, and should + swap_comparison in order to support it. */ +#define BUILTIN_DESC_SWAP_OPERANDS 1 + +struct builtin_description +{ + const unsigned int mask; + const enum insn_code icode; + const char *const name; + const enum ix86_builtins code; + const enum rtx_code comparison; + const unsigned int flag; +}; + +/* APPLE LOCAL begin 4299257 */ +static const struct builtin_description bdesc_comi[] = +{ + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, +}; +static const struct builtin_description bdesc_ucomi[] = +{ + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, +}; +/* APPLE LOCAL end 4299257 */ + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +static const struct builtin_description bdesc_ptest[] = +{ + /* SSE4.1 */ + { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 }, +}; + +static const struct builtin_description bdesc_pcmpestr[] = +{ + /* SSE4.2 */ + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, +}; + +static const struct builtin_description bdesc_pcmpistr[] = +{ + /* SSE4.2 */ + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, + { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, +}; + +static const struct builtin_description bdesc_crc32[] = +{ + /* SSE4.2 */ + { MASK_SSE4_2 | MASK_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 }, + { MASK_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 }, +}; + +/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */ +static const struct builtin_description bdesc_sse_3arg[] = +{ + /* SSE4.1 */ + { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 }, +}; +/* APPLE LOCAL end 5612787 mainline sse4 */ + +static const struct builtin_description bdesc_2arg[] = +{ + /* SSE */ + { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, + { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, + { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, + + { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, + { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, + + /* MMX */ + { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + { MASK_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, + { MASK_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, + + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, + + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, + + /* Special. */ + { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv4hi2si, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv2si2si, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv1di3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv1di2si, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv4hi2si, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv2si2si, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv1di3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv1di2si, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, + + { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv4hi2si, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv2si2si, 0, IX86_BUILTIN_PSRADI, 0, 0 }, + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, + + /* SSE2 */ + { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, + + { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, + + /* SSE2 MMX */ + { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, + + { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, + /* APPLE LOCAL 5612787 mainline sse4 */ + { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, + { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, + { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, + + /* APPLE LOCAL 5612787 mainline sse4 */ + { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, + + /* SSE3 MMX */ + { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, + /* APPLE LOCAL begin mainline */ + { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, + + /* SSSE3 MMX */ + { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, + /* APPLE LOCAL 5612787 mainline sse4 */ + { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }, + /* APPLE LOCAL end mainline */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* SSE4.1 */ + { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 }, + + /* SSE4.2 */ + { MASK_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 }, + /* APPLE LOCAL end 5612787 mainline sse4 */ +}; + +static const struct builtin_description bdesc_1arg[] = +{ + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, + + /* SSE3 */ + { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + /* APPLE LOCAL begin mainline */ + + /* SSSE3 */ + { MASK_SSSE3, CODE_FOR_ssse3_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, + /* APPLE LOCAL 5612787 mainline sse4 */ + { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, + /* APPLE LOCAL end mainline */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* SSE4.1 */ + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 }, + + /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */ + { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 }, + { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 }, + /* APPLE LOCAL end 5612787 mainline sse4 */ +}; + +static void +ix86_init_builtins (void) +{ + if (TARGET_MMX) + ix86_init_mmx_sse_builtins (); + + /* APPLE LOCAL begin constant cfstrings */ +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif + /* APPLE LOCAL end constant cfstrings */ +} + +/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX + is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX + builtins. */ +static void +ix86_init_mmx_sse_builtins (void) +{ + const struct builtin_description * d; + size_t i; + + tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode); + tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); + tree V2DI_type_node + = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); + tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); + tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); + tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); + tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); + tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); + tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + tree V1DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); + + tree pchar_type_node = build_pointer_type (char_type_node); + tree pcchar_type_node = build_pointer_type ( + build_type_variant (char_type_node, 1, 0)); + tree pfloat_type_node = build_pointer_type (float_type_node); + tree pcfloat_type_node = build_pointer_type ( + build_type_variant (float_type_node, 1, 0)); + tree pv2si_type_node = build_pointer_type (V2SI_type_node); + tree pv2di_type_node = build_pointer_type (V2DI_type_node); + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + tree pv1di_type_node = build_pointer_type (V1DI_type_node); + + /* Comparisons. */ + tree int_ftype_v4sf_v4sf + = build_function_type_list (integer_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v4si_ftype_v4sf_v4sf + = build_function_type_list (V4SI_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); + /* MMX/SSE/integer conversions. */ + tree int_ftype_v4sf + = build_function_type_list (integer_type_node, + V4SF_type_node, NULL_TREE); + tree int64_ftype_v4sf + = build_function_type_list (long_long_integer_type_node, + V4SF_type_node, NULL_TREE); + tree int_ftype_v8qi + = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int + = build_function_type_list (V4SF_type_node, + V4SF_type_node, integer_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int64 + = build_function_type_list (V4SF_type_node, + V4SF_type_node, long_long_integer_type_node, + NULL_TREE); + tree v4sf_ftype_v4sf_v2si + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V2SI_type_node, NULL_TREE); + + /* Miscellaneous. */ + tree v8qi_ftype_v4hi_v4hi + = build_function_type_list (V8QI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree v4hi_ftype_v2si_v2si + = build_function_type_list (V4HI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_v4sf_int + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SF_type_node, + integer_type_node, NULL_TREE); + tree v2si_ftype_v4hi_v4hi + = build_function_type_list (V2SI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_int + = build_function_type_list (V4HI_type_node, + V4HI_type_node, integer_type_node, NULL_TREE); + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + tree v4hi_ftype_v4hi_v1di + = build_function_type_list (V4HI_type_node, + V4HI_type_node, V1DI_type_node, + NULL_TREE); + tree v2si_ftype_v2si_int + = build_function_type_list (V2SI_type_node, + V2SI_type_node, integer_type_node, NULL_TREE); + tree v2si_ftype_v2si_v1di + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V1DI_type_node, NULL_TREE); + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + tree void_ftype_void + = build_function_type (void_type_node, void_list_node); + tree void_ftype_unsigned + = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); + tree void_ftype_unsigned_unsigned + = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree void_ftype_pcvoid_unsigned_unsigned + = build_function_type_list (void_type_node, const_ptr_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); + tree unsigned_ftype_void + = build_function_type (unsigned_type_node, void_list_node); + tree v2si_ftype_v4sf + = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); + /* Loads/stores. */ + tree void_ftype_v8qi_v8qi_pchar + = build_function_type_list (void_type_node, + V8QI_type_node, V8QI_type_node, + pchar_type_node, NULL_TREE); + tree v4sf_ftype_pcfloat + = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); + /* @@@ the type is bogus */ + tree v4sf_ftype_v4sf_pv2si + = build_function_type_list (V4SF_type_node, + V4SF_type_node, pv2si_type_node, NULL_TREE); + tree void_ftype_pv2si_v4sf + = build_function_type_list (void_type_node, + pv2si_type_node, V4SF_type_node, NULL_TREE); + tree void_ftype_pfloat_v4sf + = build_function_type_list (void_type_node, + pfloat_type_node, V4SF_type_node, NULL_TREE); + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + tree void_ftype_pv1di_v1di + = build_function_type_list (void_type_node, + pv1di_type_node, V1DI_type_node, NULL_TREE); + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + tree void_ftype_pv2di_v2di + = build_function_type_list (void_type_node, + pv2di_type_node, V2DI_type_node, NULL_TREE); + /* Normal vector unops. */ + tree v4sf_ftype_v4sf + = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + /* APPLE LOCAL begin mainline */ + tree v16qi_ftype_v16qi + = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v8hi_ftype_v8hi + = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); + tree v4si_ftype_v4si + = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); + tree v8qi_ftype_v8qi + = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi + = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); + /* APPLE LOCAL end mainline */ + + /* Normal vector binops. */ + tree v4sf_ftype_v4sf_v4sf + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v8qi_ftype_v8qi_v8qi + = build_function_type_list (V8QI_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_v4hi + = build_function_type_list (V4HI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree v2si_ftype_v2si_v2si + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + tree v1di_ftype_v1di_v1di + = build_function_type_list (V1DI_type_node, + V1DI_type_node, V1DI_type_node, NULL_TREE); + /* APPLE LOCAL begin 4684674 */ + tree v1di_ftype_v1di_int + = build_function_type_list (V1DI_type_node, + V1DI_type_node, integer_type_node, NULL_TREE); + /* APPLE LOCAL end 4684674 */ + /* APPLE LOCAL begin 4656532 */ + tree v1di_ftype_v1di_v1di_int + = build_function_type_list (V1DI_type_node, + V1DI_type_node, + V1DI_type_node, + integer_type_node, NULL_TREE); + /* APPLE LOCAL end 4656532 */ + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + tree v2si_ftype_v2sf + = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); + tree v2sf_ftype_v2si + = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); + tree v2si_ftype_v2si + = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); + tree v2sf_ftype_v2sf + = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); + tree v2sf_ftype_v2sf_v2sf + = build_function_type_list (V2SF_type_node, + V2SF_type_node, V2SF_type_node, NULL_TREE); + tree v2si_ftype_v2sf_v2sf + = build_function_type_list (V2SI_type_node, + V2SF_type_node, V2SF_type_node, NULL_TREE); + tree pint_type_node = build_pointer_type (integer_type_node); + tree pdouble_type_node = build_pointer_type (double_type_node); + tree pcdouble_type_node = build_pointer_type ( + build_type_variant (double_type_node, 1, 0)); + tree int_ftype_v2df_v2df + = build_function_type_list (integer_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + + tree void_ftype_pcvoid + = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); + tree v4sf_ftype_v4si + = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); + tree v4si_ftype_v4sf + = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v4si + = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); + tree v4si_ftype_v2df + = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); + tree v2si_ftype_v2df + = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); + tree v4sf_ftype_v2df + = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2si + = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); + tree v2df_ftype_v4sf + = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); + tree int_ftype_v2df + = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); + tree int64_ftype_v2df + = build_function_type_list (long_long_integer_type_node, + V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df_int + = build_function_type_list (V2DF_type_node, + V2DF_type_node, integer_type_node, NULL_TREE); + tree v2df_ftype_v2df_int64 + = build_function_type_list (V2DF_type_node, + V2DF_type_node, long_long_integer_type_node, + NULL_TREE); + tree v4sf_ftype_v4sf_v2df + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df_v4sf + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v2df_v2df_int + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DF_type_node, + integer_type_node, + NULL_TREE); + tree v2df_ftype_v2df_pcdouble + = build_function_type_list (V2DF_type_node, + V2DF_type_node, pcdouble_type_node, NULL_TREE); + tree void_ftype_pdouble_v2df + = build_function_type_list (void_type_node, + pdouble_type_node, V2DF_type_node, NULL_TREE); + tree void_ftype_pint_int + = build_function_type_list (void_type_node, + pint_type_node, integer_type_node, NULL_TREE); + tree void_ftype_v16qi_v16qi_pchar + = build_function_type_list (void_type_node, + V16QI_type_node, V16QI_type_node, + pchar_type_node, NULL_TREE); + tree v2df_ftype_pcdouble + = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); + tree v2df_ftype_v2df_v2df + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v16qi_ftype_v16qi_v16qi + = build_function_type_list (V16QI_type_node, + V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_v8hi + = build_function_type_list (V8HI_type_node, + V8HI_type_node, V8HI_type_node, NULL_TREE); + tree v4si_ftype_v4si_v4si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, V4SI_type_node, NULL_TREE); + tree v2di_ftype_v2di_v2di + = build_function_type_list (V2DI_type_node, + V2DI_type_node, V2DI_type_node, NULL_TREE); + tree v2di_ftype_v2df_v2df + = build_function_type_list (V2DI_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2df_ftype_v2df + = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2di_ftype_v2di_int + = build_function_type_list (V2DI_type_node, + V2DI_type_node, integer_type_node, NULL_TREE); + /* APPLE LOCAL begin mainline */ + tree v2di_ftype_v2di_v2di_int + = build_function_type_list (V2DI_type_node, V2DI_type_node, + V2DI_type_node, integer_type_node, NULL_TREE); + /* APPLE LOCAL end mainline */ + tree v4si_ftype_v4si_int + = build_function_type_list (V4SI_type_node, + V4SI_type_node, integer_type_node, NULL_TREE); + tree v8hi_ftype_v8hi_int + = build_function_type_list (V8HI_type_node, + V8HI_type_node, integer_type_node, NULL_TREE); + tree v4si_ftype_v8hi_v8hi + = build_function_type_list (V4SI_type_node, + V8HI_type_node, V8HI_type_node, NULL_TREE); + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + tree v1di_ftype_v8qi_v8qi + = build_function_type_list (V1DI_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v1di_ftype_v2si_v2si + = build_function_type_list (V1DI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + /* APPLE LOCAL end 5612787 mainline sse4 */ + + tree v2di_ftype_v16qi_v16qi + = build_function_type_list (V2DI_type_node, + V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v2di_ftype_v4si_v4si + = build_function_type_list (V2DI_type_node, + V4SI_type_node, V4SI_type_node, NULL_TREE); + tree int_ftype_v16qi + = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); + tree v16qi_ftype_pcchar + = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); + tree void_ftype_pchar_v16qi + = build_function_type_list (void_type_node, + pchar_type_node, V16QI_type_node, NULL_TREE); + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + tree v2di_ftype_v2di_unsigned_unsigned + = build_function_type_list (V2DI_type_node, V2DI_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); + tree v2di_ftype_v2di_v2di_unsigned_unsigned + = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); + tree v2di_ftype_v2di_v16qi + = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, + NULL_TREE); + tree v2df_ftype_v2df_v2df_v2df + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DF_type_node, + V2DF_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_v4sf_v4sf + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SF_type_node, + V4SF_type_node, NULL_TREE); + tree v8hi_ftype_v16qi + = build_function_type_list (V8HI_type_node, V16QI_type_node, + NULL_TREE); + tree v4si_ftype_v16qi + = build_function_type_list (V4SI_type_node, V16QI_type_node, + NULL_TREE); + tree v2di_ftype_v16qi + = build_function_type_list (V2DI_type_node, V16QI_type_node, + NULL_TREE); + tree v4si_ftype_v8hi + = build_function_type_list (V4SI_type_node, V8HI_type_node, + NULL_TREE); + tree v2di_ftype_v8hi + = build_function_type_list (V2DI_type_node, V8HI_type_node, + NULL_TREE); + tree v2di_ftype_v4si + = build_function_type_list (V2DI_type_node, V4SI_type_node, + NULL_TREE); + tree v2di_ftype_pv2di + = build_function_type_list (V2DI_type_node, pv2di_type_node, + NULL_TREE); + tree v16qi_ftype_v16qi_v16qi_int + = build_function_type_list (V16QI_type_node, V16QI_type_node, + V16QI_type_node, integer_type_node, + NULL_TREE); + tree v16qi_ftype_v16qi_v16qi_v16qi + = build_function_type_list (V16QI_type_node, V16QI_type_node, + V16QI_type_node, V16QI_type_node, + NULL_TREE); + tree v8hi_ftype_v8hi_v8hi_int + = build_function_type_list (V8HI_type_node, V8HI_type_node, + V8HI_type_node, integer_type_node, + NULL_TREE); + tree v4si_ftype_v4si_v4si_int + = build_function_type_list (V4SI_type_node, V4SI_type_node, + V4SI_type_node, integer_type_node, + NULL_TREE); + tree int_ftype_v2di_v2di + = build_function_type_list (integer_type_node, + V2DI_type_node, V2DI_type_node, + NULL_TREE); + tree int_ftype_v16qi_int_v16qi_int_int + = build_function_type_list (integer_type_node, + V16QI_type_node, + integer_type_node, + V16QI_type_node, + integer_type_node, + integer_type_node, + NULL_TREE); + tree v16qi_ftype_v16qi_int_v16qi_int_int + = build_function_type_list (V16QI_type_node, + V16QI_type_node, + integer_type_node, + V16QI_type_node, + integer_type_node, + integer_type_node, + NULL_TREE); + tree int_ftype_v16qi_v16qi_int + = build_function_type_list (integer_type_node, + V16QI_type_node, + V16QI_type_node, + integer_type_node, + NULL_TREE); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + tree float80_type; + tree float128_type; + tree ftype; + + /* The __float80 type. */ + if (TYPE_MODE (long_double_type_node) == XFmode) + (*lang_hooks.types.register_builtin_type) (long_double_type_node, + "__float80"); + else + { + /* The __float80 type. */ + float80_type = make_node (REAL_TYPE); + TYPE_PRECISION (float80_type) = 80; + layout_type (float80_type); + (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); + } + + if (TARGET_64BIT) + { + float128_type = make_node (REAL_TYPE); + TYPE_PRECISION (float128_type) = 128; + layout_type (float128_type); + (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); + } + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* Add all SSE builtins that are more or less simple operations on + three operands. */ + for (i = 0, d = bdesc_sse_3arg; + i < ARRAY_SIZE (bdesc_sse_3arg); + i++, d++) + { + /* Use one of the operands; the target can have a different mode for + mask-generating compares. */ + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V16QImode: + type = v16qi_ftype_v16qi_v16qi_int; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi_int; + break; + case V4SImode: + type = v4si_ftype_v4si_v4si_int; + break; + case V2DImode: + type = v2di_ftype_v2di_v2di_int; + break; + case V2DFmode: + type = v2df_ftype_v2df_v2df_int; + break; + case V4SFmode: + type = v4sf_ftype_v4sf_v4sf_int; + break; + default: + gcc_unreachable (); + } + + /* Override for variable blends. */ + switch (d->icode) + { + case CODE_FOR_sse4_1_blendvpd: + type = v2df_ftype_v2df_v2df_v2df; + break; + case CODE_FOR_sse4_1_blendvps: + type = v4sf_ftype_v4sf_v4sf_v4sf; + break; + case CODE_FOR_sse4_1_pblendvb: + type = v16qi_ftype_v16qi_v16qi_v16qi; + break; + default: + break; + } + + def_builtin (d->mask, d->name, type, d->code); + } + /* APPLE LOCAL end 5612787 mainline sse4 */ + + /* Add all builtins that are more or less simple operations on two + operands. */ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + { + /* Use one of the operands; the target can have a different mode for + mask-generating compares. */ + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V16QImode: + type = v16qi_ftype_v16qi_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si_v4si; + break; + case V2DImode: + type = v2di_ftype_v2di_v2di; + break; + case V2DFmode: + type = v2df_ftype_v2df_v2df; + break; + case V4SFmode: + type = v4sf_ftype_v4sf_v4sf; + break; + case V8QImode: + type = v8qi_ftype_v8qi_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si_v2si; + break; + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + case V1DImode: + type = v1di_ftype_v1di_v1di; + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + break; + + default: + gcc_unreachable (); + } + + /* Override for comparisons. */ + if (d->icode == CODE_FOR_sse_maskcmpv4sf3 + || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) + type = v4si_ftype_v4sf_v4sf; + + if (d->icode == CODE_FOR_sse2_maskcmpv2df3 + || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) + type = v2di_ftype_v2df_v2df; + + def_builtin (d->mask, d->name, type, d->code); + } + /* APPLE LOCAL begin mainline */ + /* Add all builtins that are more or less simple operations on 1 operand. */ + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + { + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V16QImode: + type = v16qi_ftype_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si; + break; + case V2DFmode: + type = v2df_ftype_v2df; + break; + case V4SFmode: + type = v4sf_ftype_v4sf; + break; + case V8QImode: + type = v8qi_ftype_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si; + break; + + default: + abort (); + } + + def_builtin (d->mask, d->name, type, d->code); + } + /* APPLE LOCAL end mainline */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* pcmpestr[im] insns. */ + for (i = 0, d = bdesc_pcmpestr; + i < ARRAY_SIZE (bdesc_pcmpestr); + i++, d++) + { + if (d->code == IX86_BUILTIN_PCMPESTRM128) + ftype = v16qi_ftype_v16qi_int_v16qi_int_int; + else + ftype = int_ftype_v16qi_int_v16qi_int_int; + def_builtin (d->mask, d->name, ftype, d->code); + } + + /* pcmpistr[im] insns. */ + for (i = 0, d = bdesc_pcmpistr; + i < ARRAY_SIZE (bdesc_pcmpistr); + i++, d++) + { + if (d->code == IX86_BUILTIN_PCMPISTRM128) + ftype = v16qi_ftype_v16qi_v16qi_int; + else + ftype = int_ftype_v16qi_v16qi_int; + def_builtin (d->mask, d->name, ftype, d->code); + } + /* APPLE LOCAL end 5612787 mainline sse4 */ + /* Add the remaining MMX insns with somewhat more complicated types. */ + def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSLLW); + def_builtin (MASK_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI); + def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSLLD); + def_builtin (MASK_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI); + def_builtin (MASK_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ); + def_builtin (MASK_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI); + + def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRLW); + def_builtin (MASK_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI); + def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRLD); + def_builtin (MASK_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI); + def_builtin (MASK_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ); + def_builtin (MASK_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI); + + def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRAW); + def_builtin (MASK_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI); + def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRAD); + def_builtin (MASK_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI); + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); + def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); + + /* APPLE LOCAL 4299257 */ + /* comi insns. */ + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + if (d->mask == MASK_SSE2) + def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); + else + def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); + + /* APPLE LOCAL begin 4299257 */ + /* ucomi insns. */ + for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++) + if (d->mask == MASK_SSE2) + def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); + else + def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); + /* APPLE LOCAL end 4299257 */ + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* ptest insns. */ + for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) + def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); + def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); + def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); + + def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); + def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + + def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); + + def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); + def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); + def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); + + def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pv1di_v1di, IX86_BUILTIN_MOVNTQ); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + + def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); + def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); + + def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + + /* Original 3DNow! */ + def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); + def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); + def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); + def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); + def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); + def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); + + /* 3DNow! extension as used in the Athlon CPU. */ + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); + def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); + + /* SSE2 */ + def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); + def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); + def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); + def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); + def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); + def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); + def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); + + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); + + def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); + + def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); + def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); + def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); + + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); + + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); + def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); + + def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); + /* APPLE LOCAL 5919583 */ + def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128_BYTESHIFT); + def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); + /* APPLE LOCAL 5919583 */ + def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128_BYTESHIFT); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); + def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); + + def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); + + /* Prescott New Instructions. */ + def_builtin (MASK_SSE3, "__builtin_ia32_monitor", + void_ftype_pcvoid_unsigned_unsigned, + IX86_BUILTIN_MONITOR); + def_builtin (MASK_SSE3, "__builtin_ia32_mwait", + void_ftype_unsigned_unsigned, + IX86_BUILTIN_MWAIT); + def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSHDUP); + def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSLDUP); + def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", + v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + + /* APPLE LOCAL begin 4099020 */ + ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ); + ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ); + ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ); + /* APPLE LOCAL end 4099020 */ + + /* APPLE LOCAL begin 4656532 */ + /* Merom New Instructions. */ + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", + v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", v1di_ftype_v1di_v1di_int, + IX86_BUILTIN_PALIGNR); + + /* APPLE LOCAL end 4656532 */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* SSE4.1. */ + def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128); + def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128); + def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD); + def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS); + def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD); + def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS); + + /* SSE4.2. */ + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_char_type_node, + NULL_TREE); + def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI); + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI); + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI); + ftype = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + NULL_TREE); + def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI); + + /* AMDFAM10 SSE4A New built-ins */ + def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); + def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); + def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI); + def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ); + def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI); + def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); + /* APPLE LOCAL end 5612787 mainline sse4 */ + /* Access to the vec_init patterns. */ + ftype = build_function_type_list (V2SI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", + ftype, IX86_BUILTIN_VEC_INIT_V2SI); + + ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", + ftype, IX86_BUILTIN_VEC_INIT_V4HI); + + ftype = build_function_type_list (V8QI_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", + ftype, IX86_BUILTIN_VEC_INIT_V8QI); + + /* Access to the vec_extract patterns. */ + ftype = build_function_type_list (double_type_node, V2DF_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", + ftype, IX86_BUILTIN_VEC_EXT_V2DF); + + ftype = build_function_type_list (long_long_integer_type_node, + V2DI_type_node, integer_type_node, + NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", + ftype, IX86_BUILTIN_VEC_EXT_V2DI); + + ftype = build_function_type_list (float_type_node, V4SF_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", + ftype, IX86_BUILTIN_VEC_EXT_V4SF); + + ftype = build_function_type_list (intSI_type_node, V4SI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", + ftype, IX86_BUILTIN_VEC_EXT_V4SI); + + /* APPLE LOCAL begin radar 4469713 */ + /* The return type of the builtin function should be an unsigned instead + of a signed type. */ + ftype = build_function_type_list (unsigned_intHI_type_node, V8HI_type_node, + /* APPLE LOCAL end radar 4469713 */ + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", + ftype, IX86_BUILTIN_VEC_EXT_V8HI); + + ftype = build_function_type_list (intHI_type_node, V4HI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", + ftype, IX86_BUILTIN_VEC_EXT_V4HI); + + ftype = build_function_type_list (intSI_type_node, V2SI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", + ftype, IX86_BUILTIN_VEC_EXT_V2SI); + + ftype = build_function_type_list (intQI_type_node, V16QI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + /* Access to the vec_set patterns. */ + ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, + intDI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); + + ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, + float_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); + + ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, + intSI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, + intHI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", + ftype, IX86_BUILTIN_VEC_SET_V8HI); + + ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, + intHI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", + ftype, IX86_BUILTIN_VEC_SET_V4HI); + /* APPLE LOCAL begin 5612787 mainline sse4 */ + ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, + intQI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); + /* APPLE LOCAL end 5612787 mainline sse4 */ +} + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x == const0_rtx) + x = CONST0_RTX (mode); + return x; +} + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +/* Subroutine of ix86_expand_builtin to take care of SSE insns with + 4 operands. The third argument must be a constant smaller than 8 + bits or xmm0. */ + +static rtx +ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (exp); + tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); + tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode1 = insn_data[icode].operand[1].mode; + enum machine_mode mode2 = insn_data[icode].operand[2].mode; + enum machine_mode mode3 = insn_data[icode].operand[3].mode; + + if (VECTOR_MODE_P (mode1)) + op0 = safe_vector_operand (op0, mode1); + if (VECTOR_MODE_P (mode2)) + op1 = safe_vector_operand (op1, mode2); + if (VECTOR_MODE_P (mode3)) + op2 = safe_vector_operand (op2, mode3); + + if (optimize + || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if ((optimize && !register_operand (op1, mode2)) + || !(*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + switch (icode) + { + case CODE_FOR_sse4_1_blendvpd: + case CODE_FOR_sse4_1_blendvps: + case CODE_FOR_sse4_1_pblendvb: + op2 = copy_to_mode_reg (mode3, op2); + break; + + case CODE_FOR_sse4_1_roundsd: + case CODE_FOR_sse4_1_roundss: + error ("the third argument must be a 4-bit immediate"); + return const0_rtx; + + default: + error ("the third argument must be an 8-bit immediate"); + return const0_rtx; + } + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */ + +static rtx +ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (exp); + tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (optimize + || !target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0); + } + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} +/* APPLE LOCAL end 5612787 mainline sse4 */ + +/* Subroutine of ix86_expand_builtin to take care of binop insns. */ + +static rtx +ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) +{ + rtx pat, xops[3]; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (optimize || !target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (GET_MODE (op1) == SImode && mode1 == TImode) + { + rtx x = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadd (x, op1)); + op1 = gen_lowpart (TImode, x); + } + + /* The insn must want input operands in the same modes as the + result. */ + gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); + + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + /* ??? Using ix86_fixup_binary_operands is problematic when + we've got mismatched modes. Fake it. */ + + xops[0] = target; + xops[1] = op0; + xops[2] = op1; + + if (tmode == mode0 && tmode == mode1) + { + target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); + op0 = xops[1]; + op1 = xops[2]; + } + else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) + { + op0 = force_reg (mode0, op0); + op1 = force_reg (mode1, op1); + target = gen_reg_rtx (tmode); + } + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of stores. */ + +static rtx +ix86_expand_store_builtin (enum insn_code icode, tree arglist) +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode mode0 = insn_data[icode].operand[0].mode; + enum machine_mode mode1 = insn_data[icode].operand[1].mode; + + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (op0, op1); + if (pat) + emit_insn (pat); + return 0; +} + +/* Subroutine of ix86_expand_builtin to take care of unop insns. */ + +static rtx +ix86_expand_unop_builtin (enum insn_code icode, tree arglist, + rtx target, int do_load) +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + rtx op0 = expand_normal (arg0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (optimize || !target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + if (do_load) + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + else + { + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + } + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + switch (icode) + { + case CODE_FOR_sse4_1_roundpd: + case CODE_FOR_sse4_1_roundps: + { + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op1 = expand_normal (arg1); + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + error ("the second argument must be a 4-bit immediate"); + return const0_rtx; + } + pat = GEN_FCN (icode) (target, op0, op1); + } + break; + default: + pat = GEN_FCN (icode) (target, op0); + break; + } + /* APPLE LOCAL end 5612787 mainline sse4 */ + + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of three special unop insns: + sqrtss, rsqrtss, rcpss. */ + +static rtx +ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + rtx op1, op0 = expand_normal (arg0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (optimize || !target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + op1 = op0; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) + op1 = copy_to_mode_reg (mode0, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ + +static rtx +ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, + rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2; + enum machine_mode tmode = insn_data[d->icode].operand[0].mode; + enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; + enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + /* Swap operands if we have a comparison that isn't available in + hardware. */ + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) + { + rtx tmp = gen_reg_rtx (mode1); + emit_move_insn (tmp, op1); + op1 = op0; + op0 = tmp; + } + + if (optimize || !target + || GET_MODE (target) != tmode + || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); + pat = GEN_FCN (d->icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of comi insns. */ + +static rtx +ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, + rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2; + enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; + enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + /* Swap operands if we have a comparison that isn't available in + hardware. */ + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) + { + rtx tmp = op1; + op1 = op0; + op0 = tmp; + } + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); + pat = GEN_FCN (d->icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + SET_DEST (pat), + const0_rtx))); + + return SUBREG_REG (target); +} + +/* APPLE LOCAL begin 4299257 */ +/* Subroutine of ix86_expand_builtin to take care of ucomi insns. */ + +static rtx +ix86_expand_sse_ucomi (const struct builtin_description *d, tree arglist, + rtx target) +{ + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; + enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; + enum machine_mode scalar_mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + /* Swap operands if we have a comparison that isn't available in + hardware. */ + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) + { + rtx tmp = op1; + op1 = op0; + op0 = tmp; + } + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + gcc_assert (mode0 == V4SFmode || mode0 == V2DFmode); + gcc_assert (mode1 == V4SFmode || mode1 == V2DFmode); + + scalar_mode = (mode0 == V4SFmode) ? SFmode : DFmode; + op0 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode0, op0), 0); + op1 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode1, op1), 0); + + ix86_compare_op0 = op0; + ix86_compare_op1 = op1; + if (ix86_expand_setcc (comparison, target)) + return SUBREG_REG (target); + + return NULL_RTX; +} +/* APPLE LOCAL end 4299257 */ + +/* APPLE LOCAL begin 5612787 mainline sse4 */ +/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ + +static rtx +ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (exp); + tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; + enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (d->icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + SET_DEST (pat), + const0_rtx))); + + return SUBREG_REG (target); +} + +/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ + +static rtx +ix86_expand_sse_pcmpestr (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (exp); + tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); + tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); + tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp)))); + tree arg4 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp))))); + rtx scratch0, scratch1; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + rtx op4 = expand_normal (arg4); + enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; + + tmode0 = insn_data[d->icode].operand[0].mode; + tmode1 = insn_data[d->icode].operand[1].mode; + modev2 = insn_data[d->icode].operand[2].mode; + modei3 = insn_data[d->icode].operand[3].mode; + modev4 = insn_data[d->icode].operand[4].mode; + modei5 = insn_data[d->icode].operand[5].mode; + modeimm = insn_data[d->icode].operand[6].mode; + + if (VECTOR_MODE_P (modev2)) + op0 = safe_vector_operand (op0, modev2); + if (VECTOR_MODE_P (modev4)) + op2 = safe_vector_operand (op2, modev4); + + if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) + op0 = copy_to_mode_reg (modev2, op0); + if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) + op1 = copy_to_mode_reg (modei3, op1); + if ((optimize && !register_operand (op2, modev4)) + || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) + op2 = copy_to_mode_reg (modev4, op2); + if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) + op3 = copy_to_mode_reg (modei5, op3); + + if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) + { + error ("the fifth argument must be a 8-bit immediate"); + return const0_rtx; + } + + if (d->code == IX86_BUILTIN_PCMPESTRI128) + { + if (optimize || !target + || GET_MODE (target) != tmode0 + || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) + target = gen_reg_rtx (tmode0); + + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); + } + else if (d->code == IX86_BUILTIN_PCMPESTRM128) + { + if (optimize || !target + || GET_MODE (target) != tmode1 + || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) + target = gen_reg_rtx (tmode1); + + scratch0 = gen_reg_rtx (tmode0); + + pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); + } + else + { + gcc_assert (d->flag); + + scratch0 = gen_reg_rtx (tmode0); + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); + } + + if (! pat) + return 0; + + emit_insn (pat); + + if (d->flag) + { + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + emit_insn + (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG ((enum machine_mode) d->flag, + FLAGS_REG), + const0_rtx))); + return SUBREG_REG (target); + } + else + return target; +} + + +/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ + +static rtx +ix86_expand_sse_pcmpistr (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = TREE_VALUE (exp); + tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); + tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); + rtx scratch0, scratch1; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; + + tmode0 = insn_data[d->icode].operand[0].mode; + tmode1 = insn_data[d->icode].operand[1].mode; + modev2 = insn_data[d->icode].operand[2].mode; + modev3 = insn_data[d->icode].operand[3].mode; + modeimm = insn_data[d->icode].operand[4].mode; + + if (VECTOR_MODE_P (modev2)) + op0 = safe_vector_operand (op0, modev2); + if (VECTOR_MODE_P (modev3)) + op1 = safe_vector_operand (op1, modev3); + + if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) + op0 = copy_to_mode_reg (modev2, op0); + if ((optimize && !register_operand (op1, modev3)) + || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) + op1 = copy_to_mode_reg (modev3, op1); + + if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm)) + { + error ("the third argument must be a 8-bit immediate"); + return const0_rtx; + } + + if (d->code == IX86_BUILTIN_PCMPISTRI128) + { + if (optimize || !target + || GET_MODE (target) != tmode0 + || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) + target = gen_reg_rtx (tmode0); + + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); + } + else if (d->code == IX86_BUILTIN_PCMPISTRM128) + { + if (optimize || !target + || GET_MODE (target) != tmode1 + || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) + target = gen_reg_rtx (tmode1); + + scratch0 = gen_reg_rtx (tmode0); + + pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); + } + else + { + gcc_assert (d->flag); + + scratch0 = gen_reg_rtx (tmode0); + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); + } + + if (! pat) + return 0; + + emit_insn (pat); + + if (d->flag) + { + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + emit_insn + (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG ((enum machine_mode) d->flag, + FLAGS_REG), + const0_rtx))); + return SUBREG_REG (target); + } + else + return target; +} +/* APPLE LOCAL end 5612787 mainline sse4 */ + +/* Return the integer constant in ARG. Constrain it to be in the range + of the subparts of VEC_TYPE; issue an error if not. */ + +static int +get_element_number (tree vec_type, tree arg) +{ + unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; + + if (!host_integerp (arg, 1) + || (elt = tree_low_cst (arg, 1), elt > max)) + { + error ("selector must be an integer constant in the range 0..%wi", max); + return 0; + } + + return elt; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_init. We DO have language-level syntax for this, in + the form of (type){ init-list }. Except that since we can't place emms + instructions from inside the compiler, we can't allow the use of MMX + registers unless the user explicitly asks for it. So we do *not* define + vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead + we have builtins invoked by mmintrin.h that gives us license to emit + these sorts of instructions. */ + +static rtx +ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) +{ + enum machine_mode tmode = TYPE_MODE (type); + enum machine_mode inner_mode = GET_MODE_INNER (tmode); + int i, n_elt = GET_MODE_NUNITS (tmode); + rtvec v = rtvec_alloc (n_elt); + + gcc_assert (VECTOR_MODE_P (tmode)); + + for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) + { + rtx x = expand_normal (TREE_VALUE (arglist)); + RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + } + + gcc_assert (arglist == NULL); + + if (!target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); + return target; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_extract. They would be redundant (for non-MMX) if we + had a language-level syntax for referencing vector elements. */ + +static rtx +ix86_expand_vec_ext_builtin (tree arglist, rtx target) +{ + enum machine_mode tmode, mode0; + tree arg0, arg1; + int elt; + rtx op0; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + + op0 = expand_normal (arg0); + elt = get_element_number (TREE_TYPE (arg0), arg1); + + tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + mode0 = TYPE_MODE (TREE_TYPE (arg0)); + gcc_assert (VECTOR_MODE_P (mode0)); + + op0 = force_reg (mode0, op0); + + if (optimize || !target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + ix86_expand_vector_extract (true, target, op0, elt); + + return target; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_set. They would be redundant (for non-MMX) if we had + a language-level syntax for referencing vector elements. */ + +static rtx +ix86_expand_vec_set_builtin (tree arglist) +{ + enum machine_mode tmode, mode1; + tree arg0, arg1, arg2; + int elt; + rtx op0, op1, target; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + + tmode = TYPE_MODE (TREE_TYPE (arg0)); + mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + gcc_assert (VECTOR_MODE_P (tmode)); + + op0 = expand_expr (arg0, NULL_RTX, tmode, 0); + op1 = expand_expr (arg1, NULL_RTX, mode1, 0); + elt = get_element_number (TREE_TYPE (arg0), arg2); + + if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) + op1 = convert_modes (mode1, GET_MODE (op1), op1, true); + + op0 = force_reg (tmode, op0); + op1 = force_reg (mode1, op1); + + /* OP0 is the source of these builtin functions and shouldn't be + modified. Create a copy, use it and return it as target. */ + target = gen_reg_rtx (tmode); + emit_move_insn (target, op0); + ix86_expand_vector_set (true, target, op1, elt); + + return target; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + const struct builtin_description *d; + size_t i; + enum insn_code icode; + tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + tree arglist = TREE_OPERAND (exp, 1); + /* APPLE LOCAL begin 5612787 mainline sse4 */ + tree arg0, arg1, arg2, arg3; + rtx op0, op1, op2, op3, pat; + /* APPLE LOCAL ssse3 */ + enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4; + /* APPLE LOCAL end 5612787 mainline sse4 */ + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + switch (fcode) + { + case IX86_BUILTIN_EMMS: + emit_insn (gen_mmx_emms ()); + return 0; + + case IX86_BUILTIN_SFENCE: + emit_insn (gen_sse_sfence ()); + return 0; + + case IX86_BUILTIN_MASKMOVQ: + case IX86_BUILTIN_MASKMOVDQU: + icode = (fcode == IX86_BUILTIN_MASKMOVQ + ? CODE_FOR_mmx_maskmovq + : CODE_FOR_sse2_maskmovdqu); + /* Note the arg order is different from the operand order. */ + arg1 = TREE_VALUE (arglist); + arg2 = TREE_VALUE (TREE_CHAIN (arglist)); + arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + op0 = force_reg (Pmode, op0); + op0 = gen_rtx_MEM (mode1, op0); + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + pat = GEN_FCN (icode) (op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return 0; + + case IX86_BUILTIN_SQRTSS: + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); + case IX86_BUILTIN_RSQRTSS: + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); + case IX86_BUILTIN_RCPSS: + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); + + /* APPLE LOCAL begin 4099020 */ + case IX86_BUILTIN_LOADQ: + return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1); + + case IX86_BUILTIN_MOVQ: + return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0); + + case IX86_BUILTIN_STOREQ: + return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist); + /* APPLE LOCAL end 4099020 */ + + case IX86_BUILTIN_LOADUPS: + return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); + + case IX86_BUILTIN_STOREUPS: + return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); + + case IX86_BUILTIN_LOADHPS: + case IX86_BUILTIN_LOADLPS: + case IX86_BUILTIN_LOADHPD: + case IX86_BUILTIN_LOADLPD: + icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps + : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps + : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd + : CODE_FOR_sse2_loadlpd); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + op0 = force_reg (mode0, op0); + op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); + if (optimize || target == 0 + || GET_MODE (target) != tmode + || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_STOREHPS: + case IX86_BUILTIN_STORELPS: + icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps + : CODE_FOR_sse_storelps); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + op1 = force_reg (mode1, op1); + + pat = GEN_FCN (icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return const0_rtx; + + case IX86_BUILTIN_MOVNTPS: + return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); + case IX86_BUILTIN_MOVNTQ: + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return ix86_expand_store_builtin (CODE_FOR_sse_movntv1di, arglist); + + case IX86_BUILTIN_LDMXCSR: + op0 = expand_normal (TREE_VALUE (arglist)); + target = assign_386_stack_local (SImode, SLOT_VIRTUAL); + emit_move_insn (target, op0); + emit_insn (gen_sse_ldmxcsr (target)); + return 0; + + case IX86_BUILTIN_STMXCSR: + target = assign_386_stack_local (SImode, SLOT_VIRTUAL); + emit_insn (gen_sse_stmxcsr (target)); + return copy_to_mode_reg (SImode, target); + + case IX86_BUILTIN_SHUFPS: + case IX86_BUILTIN_SHUFPD: + icode = (fcode == IX86_BUILTIN_SHUFPS + ? CODE_FOR_sse_shufps + : CODE_FOR_sse2_shufpd); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + { + /* @@@ better error message */ + error ("mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_PSHUFW: + case IX86_BUILTIN_PSHUFD: + case IX86_BUILTIN_PSHUFHW: + case IX86_BUILTIN_PSHUFLW: + icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw + : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw + : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd + : CODE_FOR_mmx_pshufw); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + /* @@@ better error message */ + error ("mask must be an immediate"); + return const0_rtx; + } + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_PSLLWI128: + icode = CODE_FOR_ashlv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSLLDI128: + icode = CODE_FOR_ashlv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSLLQI128: + icode = CODE_FOR_ashlv2di3; + goto do_pshifti; + case IX86_BUILTIN_PSRAWI128: + icode = CODE_FOR_ashrv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSRADI128: + icode = CODE_FOR_ashrv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSRLWI128: + icode = CODE_FOR_lshrv8hi3; + goto do_pshifti; + case IX86_BUILTIN_PSRLDI128: + icode = CODE_FOR_lshrv4si3; + goto do_pshifti; + case IX86_BUILTIN_PSRLQI128: + icode = CODE_FOR_lshrv2di3; + goto do_pshifti; + do_pshifti: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + + /* APPLE LOCAL begin radar 5543378 mainline candidate */ + if (GET_CODE (op1) == CONST_INT) + { + if (INTVAL (op1) < 0 || INTVAL (op1) > 255) + op1 = GEN_INT (255); + } + else + { + mode2 = insn_data[icode].operand[2].mode; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + op1 = copy_to_reg (op1); + if (GET_MODE (op1) != mode2) + op1 = convert_to_mode (mode2, op1, 0); + } + } + /* APPLE LOCAL end radar 5543378 mainline candidate */ + + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_reg (op0); + + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (!pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_PSLLW128: + icode = CODE_FOR_ashlv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSLLD128: + icode = CODE_FOR_ashlv4si3; + goto do_pshift; + case IX86_BUILTIN_PSLLQ128: + icode = CODE_FOR_ashlv2di3; + goto do_pshift; + case IX86_BUILTIN_PSRAW128: + icode = CODE_FOR_ashrv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSRAD128: + icode = CODE_FOR_ashrv4si3; + goto do_pshift; + case IX86_BUILTIN_PSRLW128: + icode = CODE_FOR_lshrv8hi3; + goto do_pshift; + case IX86_BUILTIN_PSRLD128: + icode = CODE_FOR_lshrv4si3; + goto do_pshift; + case IX86_BUILTIN_PSRLQ128: + icode = CODE_FOR_lshrv2di3; + goto do_pshift; + do_pshift: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_reg (op0); + + op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); + if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) + op1 = copy_to_reg (op1); + + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (!pat) + return 0; + emit_insn (pat); + return target; + + /* APPLE LOCAL begin 5919583 */ + case IX86_BUILTIN_PSLLDQI128: + case IX86_BUILTIN_PSRLDQI128: + case IX86_BUILTIN_PSLLDQI128_BYTESHIFT: + case IX86_BUILTIN_PSRLDQI128_BYTESHIFT: + icode = ((fcode == IX86_BUILTIN_PSLLDQI128 + || fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT) + ? CODE_FOR_sse2_ashlti3 + : CODE_FOR_sse2_lshrti3); + /* APPLE LOCAL end 5919583 */ + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + /* APPLE LOCAL begin 591583 */ + if (! CONST_INT_P (op1)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + /* The _mm_srli_si128/_mm_slli_si128 primitives are defined with + a byte-shift count; inside of GCC, we prefer to specify the + width of a shift in bits. The original non-BYTESHIFT + primitives were problematic due to the "*8" in their macro + bodies; we have moved the "*8" here to resolve this. The + original builtins are still supported because many developers + rely upon them. */ + if (fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT + || fcode == IX86_BUILTIN_PSRLDQI128_BYTESHIFT) + op1 = gen_rtx_CONST_INT (SImode, INTVAL (op1) * 8); + /* APPLE LOCAL end 591583 */ + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); + } + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + target = gen_reg_rtx (V2DImode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), + op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_FEMMS: + emit_insn (gen_mmx_femms ()); + return NULL_RTX; + + case IX86_BUILTIN_PAVGUSB: + return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); + + case IX86_BUILTIN_PF2ID: + return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); + + case IX86_BUILTIN_PFACC: + return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); + + case IX86_BUILTIN_PFADD: + return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPEQ: + return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPGE: + return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); + + case IX86_BUILTIN_PFCMPGT: + return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); + + case IX86_BUILTIN_PFMAX: + return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); + + case IX86_BUILTIN_PFMIN: + return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); + + case IX86_BUILTIN_PFMUL: + return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); + + case IX86_BUILTIN_PFRCP: + return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); + + case IX86_BUILTIN_PFRCPIT1: + return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); + + case IX86_BUILTIN_PFRCPIT2: + return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); + + case IX86_BUILTIN_PFRSQIT1: + return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); + + case IX86_BUILTIN_PFRSQRT: + return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); + + case IX86_BUILTIN_PFSUB: + return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); + + case IX86_BUILTIN_PFSUBR: + return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); + + case IX86_BUILTIN_PI2FD: + return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); + + case IX86_BUILTIN_PMULHRW: + return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); + + case IX86_BUILTIN_PF2IW: + return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); + + case IX86_BUILTIN_PFNACC: + return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); + + case IX86_BUILTIN_PFPNACC: + return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); + + case IX86_BUILTIN_PI2FW: + return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); + + case IX86_BUILTIN_PSWAPDSI: + return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); + + case IX86_BUILTIN_PSWAPDSF: + return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); + + case IX86_BUILTIN_SQRTSD: + return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); + case IX86_BUILTIN_LOADUPD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); + case IX86_BUILTIN_STOREUPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); + + case IX86_BUILTIN_MFENCE: + emit_insn (gen_sse2_mfence ()); + return 0; + case IX86_BUILTIN_LFENCE: + emit_insn (gen_sse2_lfence ()); + return 0; + + case IX86_BUILTIN_CLFLUSH: + arg0 = TREE_VALUE (arglist); + op0 = expand_normal (arg0); + icode = CODE_FOR_sse2_clflush; + if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) + op0 = copy_to_mode_reg (Pmode, op0); + + emit_insn (gen_sse2_clflush (op0)); + return 0; + + case IX86_BUILTIN_MOVNTPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); + case IX86_BUILTIN_MOVNTDQ: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); + case IX86_BUILTIN_MOVNTI: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); + + case IX86_BUILTIN_LOADDQU: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); + case IX86_BUILTIN_STOREDQU: + return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); + + case IX86_BUILTIN_MONITOR: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (Pmode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + if (!TARGET_64BIT) + emit_insn (gen_sse3_monitor (op0, op1, op2)); + else + emit_insn (gen_sse3_monitor64 (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + emit_insn (gen_sse3_mwait (op0, op1)); + return 0; + + case IX86_BUILTIN_LDDQU: + return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, + target, 1); + /* APPLE LOCAL begin mainline */ + case IX86_BUILTIN_PALIGNR: + case IX86_BUILTIN_PALIGNR128: + if (fcode == IX86_BUILTIN_PALIGNR) + { + /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ + icode = CODE_FOR_ssse3_palignrv1di; + mode = V1DImode; + /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ + } + else + { + icode = CODE_FOR_ssse3_palignrti; + mode = V2DImode; + } + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); + } + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); + } + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + target = gen_reg_rtx (mode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), + op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + /* APPLE LOCAL end mainline */ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + case IX86_BUILTIN_MOVNTDQA: + return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, arglist, + target, 1); + + case IX86_BUILTIN_MOVNTSD: + return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp); + + case IX86_BUILTIN_MOVNTSS: + return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp); + + case IX86_BUILTIN_INSERTQ: + case IX86_BUILTIN_EXTRQ: + icode = (fcode == IX86_BUILTIN_EXTRQ + ? CODE_FOR_sse4a_extrq + : CODE_FOR_sse4a_insertq); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return NULL_RTX; + emit_insn (pat); + return target; + + case IX86_BUILTIN_EXTRQI: + icode = CODE_FOR_sse4a_extrqi; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + error ("index mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("length mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return NULL_RTX; + emit_insn (pat); + return target; + + case IX86_BUILTIN_INSERTQI: + icode = CODE_FOR_sse4a_insertqi; + arg0 = TREE_VALUE (exp); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("index mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (! (*insn_data[icode].operand[4].predicate) (op3, mode4)) + { + error ("length mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2, op3); + if (! pat) + return NULL_RTX; + emit_insn (pat); + return target; + /* APPLE LOCAL end 5612787 mainline sse4 */ + + case IX86_BUILTIN_VEC_INIT_V2SI: + case IX86_BUILTIN_VEC_INIT_V4HI: + case IX86_BUILTIN_VEC_INIT_V8QI: + return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); + + case IX86_BUILTIN_VEC_EXT_V2DF: + case IX86_BUILTIN_VEC_EXT_V2DI: + case IX86_BUILTIN_VEC_EXT_V4SF: + case IX86_BUILTIN_VEC_EXT_V4SI: + case IX86_BUILTIN_VEC_EXT_V8HI: + case IX86_BUILTIN_VEC_EXT_V16QI: + case IX86_BUILTIN_VEC_EXT_V2SI: + case IX86_BUILTIN_VEC_EXT_V4HI: + return ix86_expand_vec_ext_builtin (arglist, target); + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + case IX86_BUILTIN_VEC_SET_V2DI: + case IX86_BUILTIN_VEC_SET_V4SF: + case IX86_BUILTIN_VEC_SET_V4SI: + /* APPLE LOCAL end 5612787 mainline sse4 */ + case IX86_BUILTIN_VEC_SET_V8HI: + case IX86_BUILTIN_VEC_SET_V4HI: + /* APPLE LOCAL 5612787 mainline sse4 */ + case IX86_BUILTIN_VEC_SET_V16QI: + return ix86_expand_vec_set_builtin (arglist); + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + case IX86_BUILTIN_INFQ: + { + REAL_VALUE_TYPE inf; + rtx tmp; + + real_inf (&inf); + tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); + + tmp = validize_mem (force_const_mem (mode, tmp)); + + if (target == 0) + target = gen_reg_rtx (mode); + + emit_move_insn (target, tmp); + return target; + } + + case IX86_BUILTIN_FABSQ: + return ix86_expand_unop_builtin (CODE_FOR_abstf2, arglist, target, 0); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + default: + break; + } + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + for (i = 0, d = bdesc_sse_3arg; + i < ARRAY_SIZE (bdesc_sse_3arg); + i++, d++) + if (d->code == fcode) + return ix86_expand_sse_4_operands_builtin (d->icode, + arglist, + target); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + { + /* Compares are treated specially. */ + if (d->icode == CODE_FOR_sse_maskcmpv4sf3 + || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 + || d->icode == CODE_FOR_sse2_maskcmpv2df3 + || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) + return ix86_expand_sse_compare (d, arglist, target); + + return ix86_expand_binop_builtin (d->icode, arglist, target); + } + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return ix86_expand_unop_builtin (d->icode, arglist, target, 0); + + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + if (d->code == fcode) + return ix86_expand_sse_comi (d, arglist, target); + + /* APPLE LOCAL begin 4299257 */ + for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++) + if (d->code == fcode) + return ix86_expand_sse_ucomi (d, arglist, target); + /* APPLE LOCAL end 4299257 */ + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) + if (d->code == fcode) + return ix86_expand_sse_ptest (d, arglist, target); + + for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++) + if (d->code == fcode) + return ix86_expand_crc32 (d->icode, arglist, target); + + for (i = 0, d = bdesc_pcmpestr; + i < ARRAY_SIZE (bdesc_pcmpestr); + i++, d++) + if (d->code == fcode) + return ix86_expand_sse_pcmpestr (d, arglist, target); + + for (i = 0, d = bdesc_pcmpistr; + i < ARRAY_SIZE (bdesc_pcmpistr); + i++, d++) + if (d->code == fcode) + return ix86_expand_sse_pcmpistr (d, arglist, target); + /* APPLE LOCAL end 5612787 mainline sse4 */ + + gcc_unreachable (); +} + +/* Store OPERAND to the memory after reload is completed. This means + that we can't easily use assign_stack_local. */ +rtx +ix86_force_to_memory (enum machine_mode mode, rtx operand) +{ + rtx result; + + gcc_assert (reload_completed); + if (TARGET_RED_ZONE) + { + result = gen_rtx_MEM (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + GEN_INT (-RED_ZONE_SIZE))); + emit_move_insn (result, operand); + } + else if (!TARGET_RED_ZONE && TARGET_64BIT) + { + switch (mode) + { + case HImode: + case SImode: + operand = gen_lowpart (DImode, operand); + /* FALLTHRU */ + case DImode: + emit_insn ( + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (DImode, + gen_rtx_PRE_DEC (DImode, + stack_pointer_rtx)), + operand)); + break; + default: + gcc_unreachable (); + } + result = gen_rtx_MEM (mode, stack_pointer_rtx); + } + else + { + switch (mode) + { + case DImode: + { + rtx operands[2]; + split_di (&operand, 1, operands, operands + 1); + emit_insn ( + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (SImode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + operands[1])); + emit_insn ( + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (SImode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + operands[0])); + } + break; + case HImode: + /* Store HImodes as SImodes. */ + operand = gen_lowpart (SImode, operand); + /* FALLTHRU */ + case SImode: + emit_insn ( + gen_rtx_SET (VOIDmode, + gen_rtx_MEM (GET_MODE (operand), + gen_rtx_PRE_DEC (SImode, + stack_pointer_rtx)), + operand)); + break; + default: + gcc_unreachable (); + } + result = gen_rtx_MEM (mode, stack_pointer_rtx); + } + return result; +} + +/* Free operand from the memory. */ +void +ix86_free_from_memory (enum machine_mode mode) +{ + if (!TARGET_RED_ZONE) + { + int size; + + if (mode == DImode || TARGET_64BIT) + size = 8; + else + size = 4; + /* Use LEA to deallocate stack space. In peephole2 it will be converted + to pop or add instruction if registers are available. */ + emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (size)))); + } +} + +/* Put float CONST_DOUBLE in the constant pool instead of fp regs. + QImode must go into class Q_REGS. + Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and + movdf to do mem-to-mem moves through integer regs. */ +enum reg_class +ix86_preferred_reload_class (rtx x, enum reg_class class) +{ + enum machine_mode mode = GET_MODE (x); + + /* We're only allowed to return a subclass of CLASS. Many of the + following checks fail for NO_REGS, so eliminate that early. */ + if (class == NO_REGS) + return NO_REGS; + + /* All classes can load zeros. */ + if (x == CONST0_RTX (mode)) + return class; + + /* Force constants into memory if we are loading a (nonzero) constant into + an MMX or SSE register. This is because there are no MMX/SSE instructions + to load from a constant. */ + if (CONSTANT_P (x) + && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) + return NO_REGS; + + /* APPLE LOCAL begin */ + /* MERGE FIXME - ensure that 3501055 is fixed. */ + /* MERGE FIXME - ensure that 4206991 is fixed. */ + /* APPLE LOCAL end */ + /* Prefer SSE regs only, if we can use them for math. */ + if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) + return SSE_CLASS_P (class) ? class : NO_REGS; + + /* Floating-point constants need more complex checks. */ + if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) + { + /* General regs can load everything. */ + if (reg_class_subset_p (class, GENERAL_REGS)) + return class; + + /* Floats can load 0 and 1 plus some others. Note that we eliminated + zero above. We only want to wind up preferring 80387 registers if + we plan on doing computation with them. */ + if (TARGET_80387 + && standard_80387_constant_p (x)) + { + /* Limit class to non-sse. */ + if (class == FLOAT_SSE_REGS) + return FLOAT_REGS; + if (class == FP_TOP_SSE_REGS) + return FP_TOP_REG; + if (class == FP_SECOND_SSE_REGS) + return FP_SECOND_REG; + if (class == FLOAT_INT_REGS || class == FLOAT_REGS) + return class; + } + + return NO_REGS; + } + + /* Generally when we see PLUS here, it's the function invariant + (plus soft-fp const_int). Which can only be computed into general + regs. */ + if (GET_CODE (x) == PLUS) + return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; + + /* QImode constants are easy to load, but non-constant QImode data + must go into Q_REGS. */ + if (GET_MODE (x) == QImode && !CONSTANT_P (x)) + { + if (reg_class_subset_p (class, Q_REGS)) + return class; + if (reg_class_subset_p (Q_REGS, class)) + return Q_REGS; + return NO_REGS; + } + + return class; +} + +/* Discourage putting floating-point values in SSE registers unless + SSE math is being used, and likewise for the 387 registers. */ +enum reg_class +ix86_preferred_output_reload_class (rtx x, enum reg_class class) +{ + enum machine_mode mode = GET_MODE (x); + + /* Restrict the output reload class to the register bank that we are doing + math on. If we would like not to return a subset of CLASS, reject this + alternative: if reload cannot do this, it will still use its choice. */ + mode = GET_MODE (x); + if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) + return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; + + if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) + { + if (class == FP_TOP_SSE_REGS) + return FP_TOP_REG; + else if (class == FP_SECOND_SSE_REGS) + return FP_SECOND_REG; + else + return FLOAT_CLASS_P (class) ? class : NO_REGS; + } + + return class; +} + +/* If we are copying between general and FP registers, we need a memory + location. The same is true for SSE and MMX registers. + + The macro can't work reliably when one of the CLASSES is class containing + registers from multiple units (SSE, MMX, integer). We avoid this by never + combining those units in single alternative in the machine description. + Ensure that this constraint holds to avoid unexpected surprises. + + When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not + enforce these sanity checks. */ + +int +ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, + enum machine_mode mode, int strict) +{ + if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) + || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) + || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) + || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) + || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) + || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) + { + gcc_assert (!strict); + return true; + } + + if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) + return true; + + /* ??? This is a lie. We do have moves between mmx/general, and for + mmx/sse2. But by saying we need secondary memory we discourage the + register allocator from using the mmx registers unless needed. */ + if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) + return true; + + if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) + { + /* SSE1 doesn't have any direct moves from other classes. */ + if (!TARGET_SSE2) + return true; + + /* If the target says that inter-unit moves are more expensive + than moving through memory, then don't generate them. */ + if (!TARGET_INTER_UNIT_MOVES && !optimize_size) + return true; + + /* Between SSE and general, we have moves no larger than word size. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return true; + + /* ??? For the cost of one register reformat penalty, we could use + the same instructions to move SFmode and DFmode data, but the + relevant move patterns don't support those alternatives. */ + if (mode == SFmode || mode == DFmode) + return true; + } + + return false; +} + +/* Return true if the registers in CLASS cannot represent the change from + modes FROM to TO. */ + +bool +ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, + enum reg_class class) +{ + if (from == to) + return false; + + /* x87 registers can't do subreg at all, as all values are reformatted + to extended precision. */ + if (MAYBE_FLOAT_CLASS_P (class)) + return true; + + if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) + { + /* Vector registers do not support QI or HImode loads. If we don't + disallow a change to these modes, reload will assume it's ok to + drop the subreg from (subreg:SI (reg:HI 100) 0). This affects + the vec_dupv4hi pattern. */ + if (GET_MODE_SIZE (from) < 4) + return true; + + /* Vector registers do not support subreg with nonzero offsets, which + are otherwise valid for integer registers. Since we can't see + whether we have a nonzero offset from here, prohibit all + nonparadoxical subregs changing size. */ + if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) + return true; + } + + return false; +} + +/* Return the cost of moving data from a register in class CLASS1 to + one in class CLASS2. + + It is not required that the cost always equal 2 when FROM is the same as TO; + on some machines it is expensive to move between registers if they are not + general registers. */ + +int +ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, + enum reg_class class2) +{ + /* In case we require secondary memory, compute cost of the store followed + by load. In order to avoid bad register allocation choices, we need + for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ + + if (ix86_secondary_memory_needed (class1, class2, mode, 0)) + { + int cost = 1; + + cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), + MEMORY_MOVE_COST (mode, class1, 1)); + cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), + MEMORY_MOVE_COST (mode, class2, 1)); + + /* In case of copying from general_purpose_register we may emit multiple + stores followed by single load causing memory size mismatch stall. + Count this as arbitrarily high cost of 20. */ + if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) + cost += 20; + + /* In the case of FP/MMX moves, the registers actually overlap, and we + have to switch modes in order to treat them differently. */ + if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) + || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) + cost += 20; + + return cost; + } + + /* Moves between SSE/MMX and integer unit are expensive. */ + if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) + || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) + return ix86_cost->mmxsse_to_integer; + if (MAYBE_FLOAT_CLASS_P (class1)) + return ix86_cost->fp_move; + if (MAYBE_SSE_CLASS_P (class1)) + return ix86_cost->sse_move; + if (MAYBE_MMX_CLASS_P (class1)) + return ix86_cost->mmx_move; + return 2; +} + +/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +bool +ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* Flags and only flags can only hold CCmode values. */ + if (CC_REGNO_P (regno)) + return GET_MODE_CLASS (mode) == MODE_CC; + if (GET_MODE_CLASS (mode) == MODE_CC + || GET_MODE_CLASS (mode) == MODE_RANDOM + || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) + return 0; + if (FP_REGNO_P (regno)) + return VALID_FP_MODE_P (mode); + if (SSE_REGNO_P (regno)) + { + /* We implement the move patterns for all vector modes into and + out of SSE registers, even when no operation instructions + are available. */ + return (VALID_SSE_REG_MODE (mode) + || VALID_SSE2_REG_MODE (mode) + || VALID_MMX_REG_MODE (mode) + || VALID_MMX_REG_MODE_3DNOW (mode)); + } + if (MMX_REGNO_P (regno)) + { + /* We implement the move patterns for 3DNOW modes even in MMX mode, + so if the register is available at all, then we can move data of + the given mode into or out of it. */ + return (VALID_MMX_REG_MODE (mode) + || VALID_MMX_REG_MODE_3DNOW (mode)); + } + + if (mode == QImode) + { + /* Take care for QImode values - they can be in non-QI regs, + but then they do cause partial register stalls. */ + if (regno < 4 || TARGET_64BIT) + return 1; + if (!TARGET_PARTIAL_REG_STALL) + return 1; + return reload_in_progress || reload_completed; + } + /* We handle both integer and floats in the general purpose registers. */ + else if (VALID_INT_MODE_P (mode)) + return 1; + else if (VALID_FP_MODE_P (mode)) + return 1; + /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go + on to use that value in smaller contexts, this can easily force a + pseudo to be allocated to GENERAL_REGS. Since this is no worse than + supporting DImode, allow it. */ + else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) + return 1; + + return 0; +} + +/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a + tieable integer mode. */ + +static bool +ix86_tieable_integer_mode_p (enum machine_mode mode) +{ + switch (mode) + { + case HImode: + case SImode: + return true; + + case QImode: + return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; + + case DImode: + /* APPLE LOCAL 5695218 convert int to logical bool */ + return !!TARGET_64BIT; + + default: + return false; + } +} + +/* Return true if MODE1 is accessible in a register that can hold MODE2 + without copying. That is, all register classes that can hold MODE2 + can also hold MODE1. */ + +bool +ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + if (mode1 == mode2) + return true; + + if (ix86_tieable_integer_mode_p (mode1) + && ix86_tieable_integer_mode_p (mode2)) + return true; + + /* MODE2 being XFmode implies fp stack or general regs, which means we + can tie any smaller floating point modes to it. Note that we do not + tie this with TFmode. */ + if (mode2 == XFmode) + return mode1 == SFmode || mode1 == DFmode; + + /* MODE2 being DFmode implies fp stack, general or sse regs, which means + that we can tie it with SFmode. */ + if (mode2 == DFmode) + return mode1 == SFmode; + + /* If MODE2 is only appropriate for an SSE register, then tie with + any other mode acceptable to SSE registers. */ + if (GET_MODE_SIZE (mode2) >= 8 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) + return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); + + /* If MODE2 is appropriate for an MMX (or SSE) register, then tie + with any other mode acceptable to MMX registers. */ + if (GET_MODE_SIZE (mode2) == 8 + && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) + return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); + + return false; +} + +/* Return the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + If moving between registers and memory is more expensive than + between two registers, you should define this macro to express the + relative cost. + + Model also increased moving costs of QImode registers in non + Q_REGS classes. + */ +int +ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) +{ + if (FLOAT_CLASS_P (class)) + { + int index; + switch (mode) + { + case SFmode: + index = 0; + break; + case DFmode: + index = 1; + break; + case XFmode: + index = 2; + break; + default: + return 100; + } + return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; + } + if (SSE_CLASS_P (class)) + { + int index; + switch (GET_MODE_SIZE (mode)) + { + case 4: + index = 0; + break; + case 8: + index = 1; + break; + case 16: + index = 2; + break; + default: + return 100; + } + return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; + } + if (MMX_CLASS_P (class)) + { + int index; + switch (GET_MODE_SIZE (mode)) + { + case 4: + index = 0; + break; + case 8: + index = 1; + break; + default: + return 100; + } + return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; + } + switch (GET_MODE_SIZE (mode)) + { + case 1: + if (in) + return (Q_CLASS_P (class) ? ix86_cost->int_load[0] + : ix86_cost->movzbl_load); + else + return (Q_CLASS_P (class) ? ix86_cost->int_store[0] + : ix86_cost->int_store[0] + 4); + break; + case 2: + return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; + default: + /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ + if (mode == TFmode) + mode = XFmode; + return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) + * (((int) GET_MODE_SIZE (mode) + + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); + } +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +ix86_rtx_costs (rtx x, int code, int outer_code, int *total) +{ + enum machine_mode mode = GET_MODE (x); + + switch (code) + { + case CONST_INT: + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) + *total = 3; + else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) + *total = 2; + else if (flag_pic && SYMBOLIC_CONST (x) + && (!TARGET_64BIT + || (!GET_CODE (x) != LABEL_REF + && (GET_CODE (x) != SYMBOL_REF + || !SYMBOL_REF_LOCAL_P (x))))) + *total = 1; + else + *total = 0; + return true; + + case CONST_DOUBLE: + if (mode == VOIDmode) + *total = 0; + else + switch (standard_80387_constant_p (x)) + { + case 1: /* 0.0 */ + *total = 1; + break; + default: /* Other constants */ + *total = 2; + break; + case 0: + case -1: + /* Start with (MEM (SYMBOL_REF)), since that's where + it'll probably end up. Add a penalty for size. */ + *total = (COSTS_N_INSNS (1) + + (flag_pic != 0 && !TARGET_64BIT) + + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); + break; + } + return true; + + case ZERO_EXTEND: + /* The zero extensions is often completely free on x86_64, so make + it as cheap as possible. */ + if (TARGET_64BIT && mode == DImode + && GET_MODE (XEXP (x, 0)) == SImode) + *total = 1; + else if (TARGET_ZERO_EXTEND_WITH_AND) + *total = ix86_cost->add; + else + *total = ix86_cost->movzx; + return false; + + case SIGN_EXTEND: + *total = ix86_cost->movsx; + return false; + + case ASHIFT: + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) + { + HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); + if (value == 1) + { + *total = ix86_cost->add; + return false; + } + if ((value == 2 || value == 3) + && ix86_cost->lea <= ix86_cost->shift_const) + { + *total = ix86_cost->lea; + return false; + } + } + /* FALLTHRU */ + + case ROTATE: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + if (INTVAL (XEXP (x, 1)) > 32) + *total = ix86_cost->shift_const + COSTS_N_INSNS (2); + else + *total = ix86_cost->shift_const * 2; + } + else + { + if (GET_CODE (XEXP (x, 1)) == AND) + *total = ix86_cost->shift_var * 2; + else + *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); + } + } + else + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + *total = ix86_cost->shift_const; + else + *total = ix86_cost->shift_var; + } + return false; + + case MULT: + if (FLOAT_MODE_P (mode)) + { + *total = ix86_cost->fmul; + return false; + } + else + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + int nbits; + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); + for (nbits = 0; value != 0; value &= value - 1) + nbits++; + } + else + /* This is arbitrary. */ + nbits = 7; + + /* Compute costs correctly for widening multiplication. */ + if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) + && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 + == GET_MODE_SIZE (mode)) + { + int is_mulwiden = 0; + enum machine_mode inner_mode = GET_MODE (op0); + + if (GET_CODE (op0) == GET_CODE (op1)) + is_mulwiden = 1, op1 = XEXP (op1, 0); + else if (GET_CODE (op1) == CONST_INT) + { + if (GET_CODE (op0) == SIGN_EXTEND) + is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) + == INTVAL (op1); + else + is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); + } + + if (is_mulwiden) + op0 = XEXP (op0, 0), mode = GET_MODE (op0); + } + + *total = (ix86_cost->mult_init[MODE_INDEX (mode)] + + nbits * ix86_cost->mult_bit + + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); + + return true; + } + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (FLOAT_MODE_P (mode)) + *total = ix86_cost->fdiv; + else + *total = ix86_cost->divide[MODE_INDEX (mode)]; + return false; + + case PLUS: + if (FLOAT_MODE_P (mode)) + *total = ix86_cost->fadd; + else if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) + { + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT + && CONSTANT_P (XEXP (x, 1))) + { + HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); + if (val == 2 || val == 4 || val == 8) + { + *total = ix86_cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); + *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + outer_code); + *total += rtx_cost (XEXP (x, 1), outer_code); + return true; + } + } + else if (GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); + if (val == 2 || val == 4 || val == 8) + { + *total = ix86_cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); + *total += rtx_cost (XEXP (x, 1), outer_code); + return true; + } + } + else if (GET_CODE (XEXP (x, 0)) == PLUS) + { + *total = ix86_cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); + *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); + *total += rtx_cost (XEXP (x, 1), outer_code); + return true; + } + } + /* FALLTHRU */ + + case MINUS: + if (FLOAT_MODE_P (mode)) + { + *total = ix86_cost->fadd; + return false; + } + /* FALLTHRU */ + + case AND: + case IOR: + case XOR: + if (!TARGET_64BIT && mode == DImode) + { + *total = (ix86_cost->add * 2 + + (rtx_cost (XEXP (x, 0), outer_code) + << (GET_MODE (XEXP (x, 0)) != DImode)) + + (rtx_cost (XEXP (x, 1), outer_code) + << (GET_MODE (XEXP (x, 1)) != DImode))); + return true; + } + /* FALLTHRU */ + + case NEG: + if (FLOAT_MODE_P (mode)) + { + *total = ix86_cost->fchs; + return false; + } + /* FALLTHRU */ + + case NOT: + if (!TARGET_64BIT && mode == DImode) + *total = ix86_cost->add * 2; + else + *total = ix86_cost->add; + return false; + + case COMPARE: + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && XEXP (XEXP (x, 0), 1) == const1_rtx + && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT + && XEXP (x, 1) == const0_rtx) + { + /* This kind of construct is implemented using test[bwl]. + Treat it as if we had an AND. */ + *total = (ix86_cost->add + + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) + + rtx_cost (const1_rtx, outer_code)); + return true; + } + return false; + + case FLOAT_EXTEND: + if (!TARGET_SSE_MATH + || mode == XFmode + || (mode == DFmode && !TARGET_SSE2)) + /* For standard 80387 constants, raise the cost to prevent + compress_float_constant() to generate load from memory. */ + switch (standard_80387_constant_p (XEXP (x, 0))) + { + case -1: + case 0: + *total = 0; + break; + case 1: /* 0.0 */ + *total = 1; + break; + default: + *total = (x86_ext_80387_constants & TUNEMASK + || optimize_size + ? 1 : 0); + } + return false; + + case ABS: + if (FLOAT_MODE_P (mode)) + *total = ix86_cost->fabs; + return false; + + case SQRT: + if (FLOAT_MODE_P (mode)) + *total = ix86_cost->fsqrt; + return false; + + case UNSPEC: + if (XINT (x, 1) == UNSPEC_TP) + *total = 0; + return false; + + default: + return false; + } +} + +#if TARGET_MACHO + +static int current_machopic_label_num; + +/* Given a symbol name and its associated stub, write out the + definition of the stub. */ + +void +machopic_output_stub (FILE *file, const char *symb, const char *stub) +{ + unsigned int length; + char *binder_name, *symbol_name, lazy_ptr_name[32]; + int label = ++current_machopic_label_num; + + /* For 64-bit we shouldn't get here. */ + gcc_assert (!TARGET_64BIT); + + /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ + symb = (*targetm.strip_name_encoding) (symb); + + length = strlen (stub); + binder_name = alloca (length + 32); + GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); + + length = strlen (symb); + symbol_name = alloca (length + 32); + GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); + + sprintf (lazy_ptr_name, "L%d$lz", label); + + /* APPLE LOCAL begin deep branch prediction pic-base */ + /* APPLE LOCAL begin AT&T-style stub 4164563 */ + /* Choose one of four possible sections for this stub. */ + if (MACHOPIC_ATT_STUB) + switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); + else if (MACHOPIC_PURE) + /* APPLE LOCAL end AT&T-style stub 4164563 */ + { + if (TARGET_DEEP_BRANCH_PREDICTION) + switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); + else + switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); + } + else + /* APPLE LOCAL end deep branch prediction pic-base */ + switch_to_section (darwin_sections[machopic_symbol_stub_section]); + + fprintf (file, "%s:\n", stub); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + + /* APPLE LOCAL begin use %ecx in stubs 4146993 */ + /* APPLE LOCAL begin deep branch prediction pic-base */ + /* APPLE LOCAL begin AT&T-style stub 4164563 */ + if (MACHOPIC_ATT_STUB) + { + fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); + } + else if (MACHOPIC_PURE) + /* APPLE LOCAL end AT&T-style stub 4164563 */ + { + /* PIC stub. */ + if (TARGET_DEEP_BRANCH_PREDICTION) + { + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ + fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label); + } + else + { + /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */ + fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label); + fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label); + } + fprintf (file, "\tjmp\t*%%ecx\n"); + } + else /* 16-byte -mdynamic-no-pic stub. */ + fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); + + /* APPLE LOCAL begin AT&T-style stub 4164563 */ + /* The AT&T-style ("self-modifying") stub is not lazily bound, thus + it needs no stub-binding-helper. */ + if (MACHOPIC_ATT_STUB) + return; + /* APPLE LOCAL end AT&T-style stub 4164563 */ + + /* The "stub_binding_helper" is a fragment that gets executed only + once, the first time this stub is invoked (then it becomes "dead + code"). It asks the dynamic linker to set the + lazy_symbol_pointer to point at the function we want + (e.g. printf) so that subsequent invocations of this stub go + directly to that dynamically-linked callee. Other UN*X systems + use similar stubs, but those are generated by the static linker + and never appear in assembly files. */ + /* APPLE LOCAL end deep branch prediction pic-base */ + fprintf (file, "%s:\n", binder_name); + + /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */ + if (MACHOPIC_PURE) + { + fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); + fprintf (file, "\tpushl\t%%ecx\n"); + } + else + fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name); + + fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); + /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */ + /* APPLE LOCAL end use %ecx in stubs 4146993 */ + + /* APPLE LOCAL begin deep branch prediction pic-base. */ + /* N.B. Keep the correspondence of these + 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the + old-pic/new-pic/non-pic stubs; altering this will break + compatibility with existing dylibs. */ + if (MACHOPIC_PURE) + { + /* PIC stubs. */ + if (TARGET_DEEP_BRANCH_PREDICTION) + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); + else + /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */ + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); + } + else + /* 16-byte -mdynamic-no-pic stub. */ + switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); + + fprintf (file, "%s:\n", lazy_ptr_name); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + fprintf (file, "\t.long\t%s\n", binder_name); +} +/* APPLE LOCAL end deep branch prediction pic-base */ + +void +darwin_x86_file_end (void) +{ + darwin_file_end (); + ix86_file_end (); +} + +/* APPLE LOCAL begin 4457939 stack alignment mishandled */ +void +ix86_darwin_init_expanders (void) +{ + /* <rdar://problem/4471596> stack alignment is not handled properly + + Please remove this entire function when addressing this + Radar. Please be sure to delete the definition of INIT_EXPANDERS + in i386/darwin.h as well. */ + /* Darwin/x86_32 stack pointer will be 16-byte aligned at every + CALL, but the frame pointer, when used, will be 8-bytes offset + from a 16-byte alignment (the size of the return address and the + saved frame pointer). */ + if (cfun && cfun->emit + && cfun->emit->regno_pointer_align) + { + REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = STACK_BOUNDARY; + REGNO_POINTER_ALIGN (FRAME_POINTER_REGNUM) = BITS_PER_WORD; + REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_WORD; + REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = BITS_PER_WORD; + } +} +/* APPLE LOCAL end 4457939 stack alignment mishandled */ +#endif /* TARGET_MACHO */ + +/* Order the registers for register allocator. */ + +void +x86_order_regs_for_local_alloc (void) +{ + int pos = 0; + int i; + + /* First allocate the local general purpose registers. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) && call_used_regs[i]) + reg_alloc_order [pos++] = i; + + /* Global general purpose registers. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) && !call_used_regs[i]) + reg_alloc_order [pos++] = i; + + /* x87 registers come first in case we are doing FP math + using them. */ + if (!TARGET_SSE_MATH) + for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) + reg_alloc_order [pos++] = i; + + /* SSE registers. */ + for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) + reg_alloc_order [pos++] = i; + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) + reg_alloc_order [pos++] = i; + + /* x87 registers. */ + if (TARGET_SSE_MATH) + for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) + reg_alloc_order [pos++] = i; + + for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) + reg_alloc_order [pos++] = i; + + /* Initialize the rest of array as we do not allocate some registers + at all. */ + while (pos < FIRST_PSEUDO_REGISTER) + reg_alloc_order [pos++] = 0; +} + +/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +ix86_handle_struct_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + tree *type = NULL; + if (DECL_P (*node)) + { + if (TREE_CODE (*node) == TYPE_DECL) + type = &TREE_TYPE (*node); + } + else + type = node; + + if (!(type && (TREE_CODE (*type) == RECORD_TYPE + || TREE_CODE (*type) == UNION_TYPE))) + { + warning (OPT_Wattributes, "%qs attribute ignored", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + + else if ((is_attribute_p ("ms_struct", name) + && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) + || ((is_attribute_p ("gcc_struct", name) + && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) + { + warning (OPT_Wattributes, "%qs incompatible attribute ignored", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static bool +ix86_ms_bitfield_layout_p (tree record_type) +{ + return (TARGET_MS_BITFIELD_LAYOUT && + !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) + || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); +} + +/* Returns an expression indicating where the this parameter is + located on entry to the FUNCTION. */ + +static rtx +x86_this_parameter (tree function) +{ + tree type = TREE_TYPE (function); + + if (TARGET_64BIT) + { + int n = aggregate_value_p (TREE_TYPE (type), type) != 0; + return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); + } + + if (ix86_function_regparm (type, function) > 0) + { + tree parm; + + parm = TYPE_ARG_TYPES (type); + /* Figure out whether or not the function has a variable number of + arguments. */ + for (; parm; parm = TREE_CHAIN (parm)) + if (TREE_VALUE (parm) == void_type_node) + break; + /* If not, the this parameter is in the first argument. */ + if (parm) + { + int regno = 0; + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) + regno = 2; + return gen_rtx_REG (SImode, regno); + } + } + + if (aggregate_value_p (TREE_TYPE (type), type)) + return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); + else + return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); +} + +/* Determine whether x86_output_mi_thunk can succeed. */ + +static bool +x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta ATTRIBUTE_UNUSED, + HOST_WIDE_INT vcall_offset, tree function) +{ + /* 64-bit can handle anything. */ + if (TARGET_64BIT) + return true; + + /* For 32-bit, everything's fine if we have one free register. */ + if (ix86_function_regparm (TREE_TYPE (function), function) < 3) + return true; + + /* Need a free register for vcall_offset. */ + if (vcall_offset) + return false; + + /* Need a free register for GOT references. */ + if (flag_pic && !(*targetm.binds_local_p) (function)) + return false; + + /* Otherwise ok. */ + return true; +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, + tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, tree function) +{ + rtx xops[3]; + rtx this = x86_this_parameter (function); + rtx this_reg, tmp; + + /* If VCALL_OFFSET, we'll need THIS in a register. Might as well + pull it in now and let DELTA benefit. */ + if (REG_P (this)) + this_reg = this; + else if (vcall_offset) + { + /* Put the this parameter into %eax. */ + xops[0] = this; + xops[1] = this_reg = gen_rtx_REG (Pmode, 0); + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + } + else + this_reg = NULL_RTX; + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + xops[0] = GEN_INT (delta); + xops[1] = this_reg ? this_reg : this; + if (TARGET_64BIT) + { + if (!x86_64_general_operand (xops[0], DImode)) + { + tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); + xops[1] = tmp; + output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); + xops[0] = tmp; + xops[1] = this; + } + output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); + } + else + output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); + } + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) + { + if (TARGET_64BIT) + tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); + else + { + int tmp_regno = 2 /* ECX */; + if (lookup_attribute ("fastcall", + TYPE_ATTRIBUTES (TREE_TYPE (function)))) + tmp_regno = 0 /* EAX */; + tmp = gen_rtx_REG (SImode, tmp_regno); + } + + xops[0] = gen_rtx_MEM (Pmode, this_reg); + xops[1] = tmp; + if (TARGET_64BIT) + output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); + else + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + + /* Adjust the this parameter. */ + xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); + if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) + { + rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); + xops[0] = GEN_INT (vcall_offset); + xops[1] = tmp2; + output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); + xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); + } + xops[1] = this_reg; + if (TARGET_64BIT) + output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); + else + output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); + } + + /* If necessary, drop THIS back to its stack slot. */ + if (this_reg && this_reg != this) + { + xops[0] = this_reg; + xops[1] = this; + output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); + } + + xops[0] = XEXP (DECL_RTL (function), 0); + if (TARGET_64BIT) + { + if (!flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jmp\t%P0", xops); + else + { + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); + tmp = gen_rtx_CONST (Pmode, tmp); + tmp = gen_rtx_MEM (QImode, tmp); + xops[0] = tmp; + output_asm_insn ("jmp\t%A0", xops); + } + } + else + { + if (!flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jmp\t%P0", xops); + else +#if TARGET_MACHO + if (TARGET_MACHO) + { + rtx sym_ref = XEXP (DECL_RTL (function), 0); + /* APPLE LOCAL begin axe stubs 5571540 */ + if (darwin_stubs) + sym_ref = (gen_rtx_SYMBOL_REF + (Pmode, + machopic_indirection_name (sym_ref, /*stub_p=*/true))); + tmp = gen_rtx_MEM (QImode, sym_ref); + /* APPLE LOCAL end axe stubs 5571540 */ + xops[0] = tmp; + output_asm_insn ("jmp\t%0", xops); + } + else +#endif /* TARGET_MACHO */ + { + tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp, NULL_RTX); + + xops[1] = tmp; + output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); + output_asm_insn ("jmp\t{*}%1", xops); + } + } +} + +static void +x86_file_start (void) +{ + default_file_start (); +#if TARGET_MACHO + darwin_file_start (); +#endif + if (X86_FILE_START_VERSION_DIRECTIVE) + fputs ("\t.version\t\"01.01\"\n", asm_out_file); + if (X86_FILE_START_FLTUSED) + fputs ("\t.global\t__fltused\n", asm_out_file); + if (ix86_asm_dialect == ASM_INTEL) + fputs ("\t.intel_syntax\n", asm_out_file); +} + +int +x86_field_alignment (tree field, int computed) +{ + enum machine_mode mode; + tree type = TREE_TYPE (field); + + if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) + return computed; + /* APPLE LOCAL begin mac68k alignment */ +#if TARGET_MACHO + if (OPTION_ALIGN_MAC68K) + { + if (computed >= 128) + return computed; + return MIN (computed, 16); + } +#endif + /* APPLE LOCAL end mac68k alignment */ + mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE + ? get_inner_array_type (type) : type); + if (mode == DFmode || mode == DCmode + || GET_MODE_CLASS (mode) == MODE_INT + || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) + return MIN (32, computed); + return computed; +} + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ +void +x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) +{ + if (TARGET_64BIT) + if (flag_pic) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); +#endif + fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); + } + else + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); +#endif + fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); + } + else if (flag_pic) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", + LPREFIX, labelno, PROFILE_COUNT_REGISTER); +#endif + fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); + } + else + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, + PROFILE_COUNT_REGISTER); +#endif + fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); + } +} + +/* We don't have exact information about the insn sizes, but we may assume + quite safely that we are informed about all 1 byte insns and memory + address sizes. This is enough to eliminate unnecessary padding in + 99% of cases. */ + +static int +min_insn_size (rtx insn) +{ + int l = 0; + + if (!INSN_P (insn) || !active_insn_p (insn)) + return 0; + + /* Discard alignments we've emit and jump instructions. */ + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) + return 0; + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) + return 0; + + /* Important case - calls are always 5 bytes. + It is common to have many calls in the row. */ + if (GET_CODE (insn) == CALL_INSN + && symbolic_reference_mentioned_p (PATTERN (insn)) + && !SIBLING_CALL_P (insn)) + return 5; + if (get_attr_length (insn) <= 1) + return 1; + + /* For normal instructions we may rely on the sizes of addresses + and the presence of symbol to require 4 bytes of encoding. + This is not the case for jumps where references are PC relative. */ + if (GET_CODE (insn) != JUMP_INSN) + { + l = get_attr_length_address (insn); + if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) + l = 4; + } + if (l) + return 1+l; + else + return 2; +} + +/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte + window. */ + +static void +ix86_avoid_jump_misspredicts (void) +{ + rtx insn, start = get_insns (); + int nbytes = 0, njumps = 0; + int isjump = 0; + + /* Look for all minimal intervals of instructions containing 4 jumps. + The intervals are bounded by START and INSN. NBYTES is the total + size of instructions in the interval including INSN and not including + START. When the NBYTES is smaller than 16 bytes, it is possible + that the end of START and INSN ends up in the same 16byte page. + + The smallest offset in the page INSN can start is the case where START + ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). + We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). + */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + + nbytes += min_insn_size (insn); + if (dump_file) + fprintf(dump_file, "Insn %i estimated to %i bytes\n", + INSN_UID (insn), min_insn_size (insn)); + if ((GET_CODE (insn) == JUMP_INSN + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + || GET_CODE (insn) == CALL_INSN) + njumps++; + else + continue; + + while (njumps > 3) + { + start = NEXT_INSN (start); + if ((GET_CODE (start) == JUMP_INSN + && GET_CODE (PATTERN (start)) != ADDR_VEC + && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) + || GET_CODE (start) == CALL_INSN) + njumps--, isjump = 1; + else + isjump = 0; + nbytes -= min_insn_size (start); + } + gcc_assert (njumps >= 0); + if (dump_file) + fprintf (dump_file, "Interval %i to %i has %i bytes\n", + INSN_UID (start), INSN_UID (insn), nbytes); + + if (njumps == 3 && isjump && nbytes < 16) + { + int padsize = 15 - nbytes + min_insn_size (insn); + + if (dump_file) + fprintf (dump_file, "Padding insn %i by %i bytes!\n", + INSN_UID (insn), padsize); + emit_insn_before (gen_align (GEN_INT (padsize)), insn); + } + } +} + +/* AMD Athlon works faster + when RET is not destination of conditional jump or directly preceded + by other jump instruction. We avoid the penalty by inserting NOP just + before the RET instructions in such cases. */ +static void +ix86_pad_returns (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + { + basic_block bb = e->src; + rtx ret = BB_END (bb); + rtx prev; + bool replace = false; + + if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN + || !maybe_hot_bb_p (bb)) + continue; + for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) + if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) + break; + if (prev && GET_CODE (prev) == CODE_LABEL) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (EDGE_FREQUENCY (e) && e->src->index >= 0 + && !(e->flags & EDGE_FALLTHRU)) + replace = true; + } + if (!replace) + { + prev = prev_active_insn (ret); + if (prev + && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) + || GET_CODE (prev) == CALL_INSN)) + replace = true; + /* Empty functions get branch mispredict even when the jump destination + is not visible to us. */ + if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) + replace = true; + } + if (replace) + { + emit_insn_before (gen_return_internal_long (), ret); + delete_insn (ret); + } + } +} + +/* Implement machine specific optimizations. We implement padding of returns + for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ +static void +ix86_reorg (void) +{ + if (TARGET_PAD_RETURNS && optimize && !optimize_size) + ix86_pad_returns (); + if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) + ix86_avoid_jump_misspredicts (); +} + +/* Return nonzero when QImode register that must be represented via REX prefix + is used. */ +bool +x86_extended_QIreg_mentioned_p (rtx insn) +{ + int i; + extract_insn_cached (insn); + for (i = 0; i < recog_data.n_operands; i++) + if (REG_P (recog_data.operand[i]) + && REGNO (recog_data.operand[i]) >= 4) + return true; + return false; +} + +/* Return nonzero when P points to register encoded via REX prefix. + Called via for_each_rtx. */ +static int +extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) +{ + unsigned int regno; + if (!REG_P (*p)) + return 0; + regno = REGNO (*p); + return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); +} + +/* Return true when INSN mentions register that must be encoded using REX + prefix. */ +bool +x86_extended_reg_mentioned_p (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); +} + +/* Generate an unsigned DImode/SImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. */ + +void +x86_emit_floatuns (rtx operands[2]) +{ + rtx neglab, donelab, i0, i1, f0, in, out; + enum machine_mode mode, inmode; + + inmode = GET_MODE (operands[1]); + /* APPLE LOCAL begin 4176531 4424891 */ + mode = GET_MODE (operands[0]); + if (!TARGET_64BIT && mode == DFmode && !optimize_size) + { + switch (inmode) + { + case SImode: + ix86_expand_convert_uns_SI2DF_sse (operands); + break; + case DImode: + ix86_expand_convert_uns_DI2DF_sse (operands); + break; + default: + abort (); + break; + } + return; + } + /* APPLE LOCAL end 4176531 4424891 */ + + out = operands[0]; + in = force_reg (inmode, operands[1]); + /* APPLE LOCAL begin one line deletion 4424891 */ + /* APPLE LOCAL end one line deletion 4424891 */ + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i1 = gen_reg_rtx (Pmode); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); + + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); + i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); + i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); + expand_float (f0, i0, 0); + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + with all elements equal to VAR. Return true if successful. */ + +static bool +ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, + rtx target, rtx val) +{ + enum machine_mode smode, wsmode, wvmode; + rtx x; + + switch (mode) + { + case V2SImode: + case V2SFmode: + if (!mmx_ok) + return false; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + case V4SFmode: + case V4SImode: + val = force_reg (GET_MODE_INNER (mode), val); + x = gen_rtx_VEC_DUPLICATE (mode, val); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return true; + + case V4HImode: + if (!mmx_ok) + return false; + if (TARGET_SSE || TARGET_3DNOW_A) + { + val = gen_lowpart (SImode, val); + x = gen_rtx_TRUNCATE (HImode, val); + x = gen_rtx_VEC_DUPLICATE (mode, x); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return true; + } + else + { + smode = HImode; + wsmode = SImode; + wvmode = V2SImode; + goto widen; + } + + case V8QImode: + if (!mmx_ok) + return false; + smode = QImode; + wsmode = HImode; + wvmode = V4HImode; + goto widen; + case V8HImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend HImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V8HImode vector. */ + tmp1 = gen_reg_rtx (V8HImode); + emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); + /* Duplicate the low short through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); + /* Cast the V8HImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V8HImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); + return true; + } + smode = HImode; + wsmode = SImode; + wvmode = V4SImode; + goto widen; + case V16QImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend QImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V16QImode vector. */ + tmp1 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); + /* Duplicate the low byte through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + /* Cast the V16QImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V16QImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); + return true; + } + smode = QImode; + wsmode = HImode; + wvmode = V8HImode; + goto widen; + widen: + /* Replicate the value once into the next wider mode and recurse. */ + val = convert_modes (wsmode, smode, val, true); + x = expand_simple_binop (wsmode, ASHIFT, val, + GEN_INT (GET_MODE_BITSIZE (smode)), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); + + x = gen_reg_rtx (wvmode); + if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) + gcc_unreachable (); + emit_move_insn (target, gen_lowpart (mode, x)); + return true; + + default: + return false; + } +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + whose ONE_VAR element is VAR, and other elements are zero. Return true + if successful. */ + +static bool +ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, + rtx target, rtx var, int one_var) +{ + enum machine_mode vsimode; + rtx new_target; + rtx x, tmp; + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (!mmx_ok) + return false; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + if (one_var != 0) + return false; + var = force_reg (GET_MODE_INNER (mode), var); + x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return true; + + case V4SFmode: + case V4SImode: + if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + new_target = gen_reg_rtx (mode); + else + new_target = target; + var = force_reg (GET_MODE_INNER (mode), var); + x = gen_rtx_VEC_DUPLICATE (mode, var); + x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); + if (one_var != 0) + { + /* We need to shuffle the value to the correct position, so + create a new pseudo to store the intermediate result. */ + + /* With SSE2, we can use the integer shuffle insns. */ + if (mode != V4SFmode && TARGET_SSE2) + { + emit_insn (gen_sse2_pshufd_1 (new_target, new_target, + GEN_INT (1), + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0 : 1), + GEN_INT (one_var == 3 ? 0 : 1))); + if (target != new_target) + emit_move_insn (target, new_target); + return true; + } + + /* Otherwise convert the intermediate result to V4SFmode and + use the SSE1 shuffle instructions. */ + if (mode != V4SFmode) + { + tmp = gen_reg_rtx (V4SFmode); + emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); + } + else + tmp = new_target; + + emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, + GEN_INT (1), + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0+4 : 1+4), + GEN_INT (one_var == 3 ? 0+4 : 1+4))); + + if (mode != V4SFmode) + emit_move_insn (target, gen_lowpart (V4SImode, tmp)); + else if (tmp != target) + emit_move_insn (target, tmp); + } + else if (target != new_target) + emit_move_insn (target, new_target); + return true; + + case V8HImode: + case V16QImode: + vsimode = V4SImode; + goto widen; + case V4HImode: + case V8QImode: + if (!mmx_ok) + return false; + vsimode = V2SImode; + goto widen; + widen: + if (one_var != 0) + return false; + + /* Zero extend the variable element to SImode and recurse. */ + var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); + + x = gen_reg_rtx (vsimode); + if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, + var, one_var)) + gcc_unreachable (); + + emit_move_insn (target, gen_lowpart (mode, x)); + return true; + + default: + return false; + } +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + consisting of the values in VALS. It is known that all elements + except ONE_VAR are constants. Return true if successful. */ + +static bool +ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, + rtx target, rtx vals, int one_var) +{ + rtx var = XVECEXP (vals, 0, one_var); + enum machine_mode wmode; + rtx const_vec, x; + + const_vec = copy_rtx (vals); + XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); + + switch (mode) + { + case V2DFmode: + case V2DImode: + case V2SFmode: + case V2SImode: + /* For the two element vectors, it's just as easy to use + the general case. */ + return false; + + case V4SFmode: + case V4SImode: + case V8HImode: + case V4HImode: + break; + + case V16QImode: + wmode = V8HImode; + goto widen; + case V8QImode: + wmode = V4HImode; + goto widen; + widen: + /* There's no way to set one QImode entry easily. Combine + the variable value with its adjacent constant value, and + promote to an HImode set. */ + x = XVECEXP (vals, 0, one_var ^ 1); + if (one_var & 1) + { + var = convert_modes (HImode, QImode, var, true); + var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + x = GEN_INT (INTVAL (x) & 0xff); + } + else + { + var = convert_modes (HImode, QImode, var, true); + x = gen_int_mode (INTVAL (x) << 8, HImode); + } + if (x != const0_rtx) + var = expand_simple_binop (HImode, IOR, var, x, var, + 1, OPTAB_LIB_WIDEN); + + x = gen_reg_rtx (wmode); + emit_move_insn (x, gen_lowpart (wmode, const_vec)); + ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); + + emit_move_insn (target, gen_lowpart (mode, x)); + return true; + + default: + return false; + } + + emit_move_insn (target, const_vec); + ix86_expand_vector_set (mmx_ok, target, var, one_var); + return true; +} + +/* A subroutine of ix86_expand_vector_init. Handle the most general case: + all values variable, and none identical. */ + +static void +ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, + rtx target, rtx vals) +{ + enum machine_mode half_mode = GET_MODE_INNER (mode); + rtx op0 = NULL, op1 = NULL; + bool use_vec_concat = false; + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (!mmx_ok && !TARGET_SSE) + break; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + /* For the two element vectors, we always implement VEC_CONCAT. */ + op0 = XVECEXP (vals, 0, 0); + op1 = XVECEXP (vals, 0, 1); + use_vec_concat = true; + break; + + case V4SFmode: + half_mode = V2SFmode; + goto half; + case V4SImode: + half_mode = V2SImode; + goto half; + half: + { + rtvec v; + + /* For V4SF and V4SI, we implement a concat of two V2 vectors. + Recurse to load the two halves. */ + + op0 = gen_reg_rtx (half_mode); + v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); + ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); + + op1 = gen_reg_rtx (half_mode); + v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); + ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); + + use_vec_concat = true; + } + break; + + case V8HImode: + case V16QImode: + case V4HImode: + case V8QImode: + break; + + default: + gcc_unreachable (); + } + + if (use_vec_concat) + { + if (!register_operand (op0, half_mode)) + op0 = force_reg (half_mode, op0); + if (!register_operand (op1, half_mode)) + op1 = force_reg (half_mode, op1); + + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_VEC_CONCAT (mode, op0, op1))); + } + else + { + int i, j, n_elts, n_words, n_elt_per_word; + enum machine_mode inner_mode; + rtx words[4], shift; + + inner_mode = GET_MODE_INNER (mode); + n_elts = GET_MODE_NUNITS (mode); + n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + n_elt_per_word = n_elts / n_words; + shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); + + for (i = 0; i < n_words; ++i) + { + rtx word = NULL_RTX; + + for (j = 0; j < n_elt_per_word; ++j) + { + rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); + elt = convert_modes (word_mode, inner_mode, elt, true); + + if (j == 0) + word = elt; + else + { + word = expand_simple_binop (word_mode, ASHIFT, word, shift, + word, 1, OPTAB_LIB_WIDEN); + word = expand_simple_binop (word_mode, IOR, word, elt, + word, 1, OPTAB_LIB_WIDEN); + } + } + + words[i] = word; + } + + if (n_words == 1) + emit_move_insn (target, gen_lowpart (mode, words[0])); + else if (n_words == 2) + { + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); + emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); + emit_move_insn (gen_highpart (word_mode, tmp), words[1]); + emit_move_insn (target, tmp); + } + else if (n_words == 4) + { + rtx tmp = gen_reg_rtx (V4SImode); + vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); + ix86_expand_vector_init_general (false, V4SImode, tmp, vals); + emit_move_insn (target, gen_lowpart (mode, tmp)); + } + else + gcc_unreachable (); + } +} + +/* Initialize vector TARGET via VALS. Suppress the use of MMX + instructions unless MMX_OK is true. */ + +void +ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true, all_const_zero = true; + int i; + rtx x; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONSTANT_P (x)) + n_var++, one_var = i; + else if (x != CONST0_RTX (inner_mode)) + all_const_zero = false; + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + /* Constants are best loaded from the constant pool. */ + if (n_var == 0) + { + emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); + return; + } + + /* If all values are identical, broadcast the value. */ + if (all_same + && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, + XVECEXP (vals, 0, 0))) + return; + + /* Values where only one field is non-constant are best loaded from + the pool and overwritten via move later. */ + if (n_var == 1) + { + if (all_const_zero + && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, + XVECEXP (vals, 0, one_var), + one_var)) + return; + + if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) + return; + } + + ix86_expand_vector_init_general (mmx_ok, mode, target, vals); +} + +void +ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + bool use_vec_merge = false; + rtx tmp; + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (mmx_ok) + { + tmp = gen_reg_rtx (GET_MODE_INNER (mode)); + ix86_expand_vector_extract (true, tmp, target, 1 - elt); + if (elt == 0) + tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); + else + tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + return; + } + break; + + /* APPLE LOCAL begin 5612787 mainline sse4 */ + case V2DImode: + use_vec_merge = TARGET_SSE4_1; + if (use_vec_merge) + break; + + case V2DFmode: + /* APPLE LOCAL end 5612787 mainline sse4 */ + { + rtx op0, op1; + + /* For the two element vectors, we implement a VEC_CONCAT with + the extraction of the other element. */ + + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); + tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); + + if (elt == 0) + op0 = val, op1 = tmp; + else + op0 = tmp, op1 = val; + + tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + return; + + case V4SFmode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_merge = TARGET_SSE4_1; + if (use_vec_merge) + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + switch (elt) + { + case 0: + use_vec_merge = true; + break; + + case 1: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* target = A A B B */ + emit_insn (gen_sse_unpcklps (target, target, target)); + /* target = X A B B */ + ix86_expand_vector_set (false, target, val, 0); + /* target = A X C D */ + emit_insn (gen_sse_shufps_1 (target, target, tmp, + GEN_INT (1), GEN_INT (0), + GEN_INT (2+4), GEN_INT (3+4))); + return; + + case 2: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ + emit_insn (gen_sse_shufps_1 (target, target, tmp, + GEN_INT (0), GEN_INT (1), + GEN_INT (0+4), GEN_INT (3+4))); + return; + + case 3: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ + emit_insn (gen_sse_shufps_1 (target, target, tmp, + GEN_INT (0), GEN_INT (1), + GEN_INT (2+4), GEN_INT (0+4))); + return; + + default: + gcc_unreachable (); + } + break; + + case V4SImode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_merge = TARGET_SSE4_1; + if (use_vec_merge) + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + /* Element 0 handled by vec_merge below. */ + if (elt == 0) + { + use_vec_merge = true; + break; + } + + if (TARGET_SSE2) + { + /* With SSE2, use integer shuffles to swap element 0 and ELT, + store into element 0, then shuffle them back. */ + + rtx order[4]; + + order[0] = GEN_INT (elt); + order[1] = const1_rtx; + order[2] = const2_rtx; + order[3] = GEN_INT (3); + order[elt] = const0_rtx; + + emit_insn (gen_sse2_pshufd_1 (target, target, order[0], + order[1], order[2], order[3])); + + ix86_expand_vector_set (false, target, val, 0); + + emit_insn (gen_sse2_pshufd_1 (target, target, order[0], + order[1], order[2], order[3])); + } + else + { + /* For SSE1, we have to reuse the V4SF code. */ + ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), + gen_lowpart (SFmode, val), elt); + } + return; + + case V8HImode: + use_vec_merge = TARGET_SSE2; + break; + case V4HImode: + use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); + break; + + case V16QImode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_merge = TARGET_SSE4_1; + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + case V8QImode: + default: + break; + } + + if (use_vec_merge) + { + tmp = gen_rtx_VEC_DUPLICATE (mode, val); + tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else + { + rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); + + emit_move_insn (mem, target); + + tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); + emit_move_insn (tmp, val); + + emit_move_insn (target, mem); + } +} + +void +ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) +{ + enum machine_mode mode = GET_MODE (vec); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + bool use_vec_extr = false; + rtx tmp; + + switch (mode) + { + case V2SImode: + case V2SFmode: + if (!mmx_ok) + break; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + use_vec_extr = true; + break; + + case V4SFmode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_extr = TARGET_SSE4_1; + if (use_vec_extr) + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + switch (elt) + { + case 0: + tmp = vec; + break; + + case 1: + case 3: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse_shufps_1 (tmp, vec, vec, + GEN_INT (elt), GEN_INT (elt), + GEN_INT (elt+4), GEN_INT (elt+4))); + break; + + case 2: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse_unpckhps (tmp, vec, vec)); + break; + + default: + gcc_unreachable (); + } + vec = tmp; + use_vec_extr = true; + elt = 0; + break; + + case V4SImode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_extr = TARGET_SSE4_1; + if (use_vec_extr) + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + if (TARGET_SSE2) + { + switch (elt) + { + case 0: + tmp = vec; + break; + + case 1: + case 3: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse2_pshufd_1 (tmp, vec, + GEN_INT (elt), GEN_INT (elt), + GEN_INT (elt), GEN_INT (elt))); + break; + + case 2: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); + break; + + default: + gcc_unreachable (); + } + vec = tmp; + use_vec_extr = true; + elt = 0; + } + else + { + /* For SSE1, we have to reuse the V4SF code. */ + ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), + gen_lowpart (V4SFmode, vec), elt); + return; + } + break; + + case V8HImode: + use_vec_extr = TARGET_SSE2; + break; + case V4HImode: + use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); + break; + + case V16QImode: + /* APPLE LOCAL begin 5612787 mainline sse4 */ + use_vec_extr = TARGET_SSE4_1; + break; + /* APPLE LOCAL end 5612787 mainline sse4 */ + case V8QImode: + /* ??? Could extract the appropriate HImode element and shift. */ + default: + break; + } + + if (use_vec_extr) + { + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); + tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); + + /* Let the rtl optimizers know about the zero extension performed. */ + /* APPLE LOCAL 5612787 mainline sse4 */ + if (inner_mode == QImode || inner_mode == HImode) + { + tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); + target = gen_lowpart (SImode, target); + } + + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else + { + rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); + + emit_move_insn (mem, vec); + + tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); + emit_move_insn (target, tmp); + } +} + +/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary + pattern to reduce; DEST is the destination; IN is the input vector. */ + +void +ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +{ + rtx tmp1, tmp2, tmp3; + + tmp1 = gen_reg_rtx (V4SFmode); + tmp2 = gen_reg_rtx (V4SFmode); + tmp3 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_sse_movhlps (tmp1, in, in)); + emit_insn (fn (tmp2, tmp1, in)); + + emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, + GEN_INT (1), GEN_INT (1), + GEN_INT (1+4), GEN_INT (1+4))); + emit_insn (fn (dest, tmp2, tmp3)); +} + +/* Target hook for scalar_mode_supported_p. */ +static bool +ix86_scalar_mode_supported_p (enum machine_mode mode) +{ + if (DECIMAL_FLOAT_MODE_P (mode)) + return true; + else + return default_scalar_mode_supported_p (mode); +} + +/* Implements target hook vector_mode_supported_p. */ +static bool +ix86_vector_mode_supported_p (enum machine_mode mode) +{ + if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) + return true; + if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) + return true; + if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) + return true; + if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) + return true; + return false; +} + +/* Worker function for TARGET_MD_ASM_CLOBBERS. + + We do this in the new i386 backend to maintain source compatibility + with the old cc0-based compiler. */ + +static tree +ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, + tree inputs ATTRIBUTE_UNUSED, + tree clobbers) +{ + clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), + clobbers); + clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), + clobbers); + clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), + clobbers); + return clobbers; +} + +/* Return true if this goes in small data/bss. */ + +static bool +ix86_in_large_data_p (tree exp) +{ + if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) + return false; + + /* Functions are never large data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + if (strcmp (section, ".ldata") == 0 + || strcmp (section, ".lbss") == 0) + return true; + return false; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in data because it might be too big when completed. */ + if (!size || size > ix86_section_threshold) + return true; + } + + return false; +} +static void +ix86_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) == VAR_DECL + && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + && ix86_in_large_data_p (decl)) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; +} + +/* Worker function for REVERSE_CONDITION. */ + +enum rtx_code +ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) +{ + return (mode != CCFPmode && mode != CCFPUmode + ? reverse_condition (code) + : reverse_condition_maybe_unordered (code)); +} + +/* Output code to perform an x87 FP register move, from OPERANDS[1] + to OPERANDS[0]. */ + +const char * +output_387_reg_move (rtx insn, rtx *operands) +{ + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { + if (REGNO (operands[0]) == FIRST_STACK_REG) + return output_387_ffreep (operands, 0); + return "fstp\t%y0"; + } + if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + return "fst\t%y0"; +} + +/* Output code to perform a conditional jump to LABEL, if C2 flag in + FP status register is set. */ + +void +ix86_emit_fp_unordered_jump (rtx label) +{ + rtx reg = gen_reg_rtx (HImode); + rtx temp; + + emit_insn (gen_x86_fnstsw_1 (reg)); + + if (TARGET_USE_SAHF) + { + emit_insn (gen_x86_sahf_1 (reg)); + + temp = gen_rtx_REG (CCmode, FLAGS_REG); + temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); + + temp = gen_rtx_REG (CCNOmode, FLAGS_REG); + temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); + } + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); + emit_jump_insn (temp); +} + +/* Output code to perform a log1p XFmode calculation. */ + +void ix86_emit_i387_log1p (rtx op0, rtx op1) +{ + rtx label1 = gen_label_rtx (); + rtx label2 = gen_label_rtx (); + + rtx tmp = gen_reg_rtx (XFmode); + rtx tmp2 = gen_reg_rtx (XFmode); + + emit_insn (gen_absxf2 (tmp, op1)); + emit_insn (gen_cmpxf (tmp, + CONST_DOUBLE_FROM_REAL_VALUE ( + REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), + XFmode))); + emit_jump_insn (gen_bge (label1)); + + emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ + emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); + emit_jump (label2); + + emit_label (label1); + emit_move_insn (tmp, CONST1_RTX (XFmode)); + emit_insn (gen_addxf3 (tmp, op1, tmp)); + emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ + emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); + + emit_label (label2); +} + +/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ + +static void +i386_solaris_elf_named_section (const char *name, unsigned int flags, + tree decl) +{ + /* With Binutils 2.15, the "@unwind" marker must be specified on + every occurrence of the ".eh_frame" section, not just the first + one. */ + if (TARGET_64BIT + && strcmp (name, ".eh_frame") == 0) + { + fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, + flags & SECTION_WRITE ? "aw" : "a"); + return; + } + default_elf_asm_named_section (name, flags, decl); +} + +/* APPLE LOCAL begin regparmandstackparm */ + +/* Mark this fndecl as using the regparmandstackparm calling convention. */ +static void +ix86_make_regparmandstackparmee (tree *pt) +{ + decl_attributes (pt, + tree_cons (get_identifier ("regparmandstackparmee"), + NULL_TREE, TYPE_ATTRIBUTES (*pt)), 0); +} + +/* Lookup fndecls marked 'regparmandstackparm', retrieve their $3SSE equivalents. */ +static splay_tree ix86_darwin_regparmandstackparm_st; +/* Cache for regparmandstackparm fntypes. */ +static splay_tree ix86_darwin_fntype_st; + +/* Append "$3SSE" to an ID, returning a new IDENTIFIER_NODE. */ +static tree +ix86_darwin_regparmandstackparm_mangle_name (tree id) +{ + static const char *mangle_suffix = "$3SSE"; + unsigned int mangle_length = strlen (mangle_suffix); + const char *name; + unsigned int orig_length; + char *buf; + + if (!id) + return NULL_TREE; + + name = IDENTIFIER_POINTER (id); + orig_length = strlen (name); + buf = alloca (orig_length + mangle_length + 1); + + strcpy (buf, name); + strcat (buf, mangle_suffix); + return get_identifier (buf); /* Expecting get_identifier to reallocate the string. */ +} + +/* Given the "normal" TRAD_FNDECL marked with 'regparmandstackparm', + return a duplicate fndecl marked 'regparmandstackparmee' (note trailing + 'ee'). Enter them as a pair in the splay tree ST, if non-null; + looking up the TRAD_FNDECL will return the new one. */ +static tree +ix86_darwin_regparmandstackparm_dup_fndecl (tree trad_fndecl, splay_tree st) +{ + tree fntype; + tree new_fndecl; + + fntype = TREE_TYPE (trad_fndecl); + + /* NEW_FNDECL will be compiled with the XMM-based calling + convention, and TRAD_FNDECL (the original) will be compiled with + the traditional stack-based calling convention. */ + new_fndecl = copy_node (trad_fndecl); + DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0; + allocate_struct_function (new_fndecl); + DECL_STRUCT_FUNCTION (new_fndecl)->function_end_locus + = DECL_STRUCT_FUNCTION (trad_fndecl)->function_end_locus; + DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = + DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl; + DECL_RESULT (new_fndecl) = copy_node (DECL_RESULT (trad_fndecl)); + DECL_CONTEXT (DECL_RESULT (new_fndecl)) = new_fndecl; + SET_DECL_ASSEMBLER_NAME (new_fndecl, 0); + DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl)); + TYPE_ATTRIBUTES (TREE_TYPE (new_fndecl)) + = copy_list (TYPE_ATTRIBUTES (TREE_TYPE (trad_fndecl))); + ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl)); + /* Kludge: block copied from tree-inline.c(save_body). Should + be refactored into a common shareable routine. */ + { + tree *parg; + + for (parg = &DECL_ARGUMENTS (new_fndecl); + *parg; + parg = &TREE_CHAIN (*parg)) + { + tree new = copy_node (*parg); + + lang_hooks.dup_lang_specific_decl (new); + DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (*parg); + DECL_CONTEXT (new) = new_fndecl; + /* Note: it may be possible to move the original parameters + with the function body, making this splay tree + unnecessary. */ + if (st) + splay_tree_insert (st, (splay_tree_key) *parg, (splay_tree_value) new); + TREE_CHAIN (new) = TREE_CHAIN (*parg); + *parg = new; + } + + if (DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl) + { + tree old = DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl; + tree new = copy_node (old); + + lang_hooks.dup_lang_specific_decl (new); + DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (old); + DECL_CONTEXT (new) = new_fndecl; + if (st) + splay_tree_insert (st, (splay_tree_key) old, (splay_tree_value) new); + TREE_CHAIN (new) = TREE_CHAIN (old); + DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = new; + } + + if (st) + splay_tree_insert (st, (splay_tree_key) DECL_RESULT (trad_fndecl), + (splay_tree_value) DECL_RESULT (new_fndecl)); + } +#if 0 + /* Testing Kludge: If TREE_READONLY is set, cgen can and + occasionally will delete "pure" (no side-effect) calls to a + library function. Cleared here to preclude this when + test-building libraries. */ + TREE_READONLY (new_fndecl) = false; +#endif + + return new_fndecl; +} + +/* FNDECL has no body, but user has marked it as a regparmandstackparm + item. Create a corresponding regparmandstackparm decl for it, and + arrange for calls to be redirected to the regparmandstackparm + version. */ +static tree +ix86_darwin_regparmandstackparm_extern_decl (tree trad_fndecl) +{ + tree new_fndecl; + + /* new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, (splay_tree)0); */ + new_fndecl = copy_node (trad_fndecl); + DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl)); + DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0; + SET_DECL_ASSEMBLER_NAME (new_fndecl, 0); + ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl)); + cgraph_finalize_function (new_fndecl, /* nested = */ true); + if (!ix86_darwin_regparmandstackparm_st) + ix86_darwin_regparmandstackparm_st + = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); + splay_tree_insert (ix86_darwin_regparmandstackparm_st, + (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl); + return new_fndecl; +} + +/* Invoked after all functions have been seen and digested, but before + any inlining decisions have been made. Walk the callgraph, seeking + calls to functions that have regparmandstackparm variants. Rewrite the + calls, directing them to the new 'regparmandstackparmee' versions. */ +void +ix86_darwin_redirect_calls(void) +{ + struct cgraph_node *fastcall_node, *node; + struct cgraph_edge *edge, *next_edge; + tree addr, fastcall_decl, orig_fntype; + splay_tree_node call_stn, type_stn; + + if (!flag_unit_at_a_time) + return; + + if (!ix86_darwin_fntype_st) + ix86_darwin_fntype_st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); + + if (!ix86_darwin_regparmandstackparm_st) + ix86_darwin_regparmandstackparm_st + = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); + + /* Extern decls marked "regparmandstackparm" beget regparmandstackparmee + decls. */ + for (node = cgraph_nodes; node; node = node->next) + if (!DECL_SAVED_TREE (node->decl) + && lookup_attribute ("regparmandstackparm", + TYPE_ATTRIBUTES (TREE_TYPE (node->decl))) + && !lookup_attribute ("regparmandstackparmee", + TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))) + { + fastcall_decl = ix86_darwin_regparmandstackparm_extern_decl (node->decl); + splay_tree_insert (ix86_darwin_regparmandstackparm_st, + (splay_tree_key) node->decl, + (splay_tree_value) fastcall_decl); + } + + /* Walk the callgraph, rewriting calls as we go. */ + for (node = cgraph_nodes; node; node = node->next) + { + call_stn = splay_tree_lookup (ix86_darwin_regparmandstackparm_st, + (splay_tree_key)node->decl); + /* If this function was in our splay-tree, we previously created + a regparmandstackparm version of it. */ + if (call_stn) + { + fastcall_decl = (tree)call_stn->value; + fastcall_node = cgraph_node (fastcall_decl); + /* Redirect all calls to this fn to the regparmandstackparm + version. */ + for (edge = next_edge = node->callers ; edge ; edge = next_edge) + { + tree call, stmt; + next_edge = next_edge->next_caller; + cgraph_redirect_edge_callee (edge, fastcall_node); + /* APPLE LOCAL */ + /* MERGE FIXME call_expr -> call_stmt */ + stmt = edge->call_stmt; + call = get_call_expr_in (stmt); + addr = TREE_OPERAND (call, 0); + TREE_OPERAND (addr, 0) = fastcall_decl; + orig_fntype = TREE_TYPE (addr); + /* Likewise, revise the TYPE of the ADDR node between + the CALL_EXPR and the FNDECL. This type determines + the parameters and calling convention applied to this + CALL_EXPR. */ + type_stn = splay_tree_lookup (ix86_darwin_fntype_st, (splay_tree_value)orig_fntype); + if (type_stn) + TREE_TYPE (addr) = (tree)type_stn->value; + else + { + ix86_make_regparmandstackparmee (&TREE_TYPE (addr)); + splay_tree_insert (ix86_darwin_fntype_st, + (splay_tree_key)orig_fntype, + (splay_tree_value)TREE_TYPE (addr)); + } + } + } + } +} + +/* Information necessary to re-context a function body. */ +typedef struct { + tree old_context; + tree new_context; + splay_tree decl_map; +} recontext_data; + +/* Visit every node of a function body; if it points at the + OLD_CONTEXT, re-direct it to the NEW_CONTEXT. Invoked via + walk_tree. DECL_MAP is a splay tree that maps the original + parameters to new ones. */ +static tree +ix86_darwin_re_context_1 (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED) +{ + tree t; + recontext_data *rcd; + enum tree_code_class class; + splay_tree_node n; + + if (!tp) + return NULL_TREE; + + t = *tp; + if (!t) + return NULL_TREE; + + rcd = (recontext_data *)data; + n = splay_tree_lookup (rcd->decl_map, (splay_tree_key) t); + if (n) + { + *tp = (tree)n->value; + return NULL_TREE; + } + + class = TREE_CODE_CLASS (TREE_CODE (t)); + if (class != tcc_declaration) + return NULL_TREE; + + if (DECL_CONTEXT (t) == rcd->old_context) + DECL_CONTEXT (t) = rcd->new_context; + + return NULL_TREE; +} + +/* Walk a function body, updating every pointer to OLD_CONTEXT to + NEW_CONTEXT. TP is the top of the function body, and ST is a splay + tree of replacements for the parameters. */ +static tree +ix86_darwin_re_context (tree *tp, tree old_context, tree new_context, splay_tree st) +{ + recontext_data rcd; + tree ret; + + rcd.old_context = old_context; + rcd.new_context = new_context; + rcd.decl_map = st; + + ret = walk_tree (tp, ix86_darwin_re_context_1, + (void *)&rcd, (struct pointer_set_t *)0); + return ret; +} + +/* Given TRAD_FNDECL, create a regparmandstackparm variant and hang the + DECL_SAVED_TREE body there. Create a new, one-statement body for + TRAD_FNDECL that calls the new one. If the return types are + compatible (e.g. non-FP), the call can usually be sibcalled. The + inliner will often copy the body from NEW_FNDECL into TRAD_FNDECL, + and we do nothing to prevent this. */ +static void +ix86_darwin_regparmandstackparm_wrapper (tree trad_fndecl) +{ + tree new_fndecl; + splay_tree st; + tree bind, block, call, clone_parm, modify, parmlist, rdecl, rtn, stmt_list, type; + tree_stmt_iterator tsi; + + st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); + new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, st); + + for (parmlist = NULL, clone_parm = DECL_ARGUMENTS (trad_fndecl); + clone_parm; + clone_parm = TREE_CHAIN (clone_parm)) + { + gcc_assert (clone_parm); + DECL_ABSTRACT_ORIGIN (clone_parm) = NULL; + parmlist = tree_cons (NULL, clone_parm, parmlist); + } + + /* We built this list backwards; fix now. */ + parmlist = nreverse (parmlist); + type = TREE_TYPE (TREE_TYPE (trad_fndecl)); + call = build_function_call (new_fndecl, parmlist); + TREE_TYPE (call) = type; + if (type == void_type_node) + rtn = call; + else if (0 && ix86_return_in_memory (type)) + { + /* Return without a RESULT_DECL: RETURN_EXPR (CALL). */ + rtn = make_node (RETURN_EXPR); + TREE_OPERAND (rtn, 0) = call; + TREE_TYPE (rtn) = type; + } + else /* RETURN_EXPR(MODIFY(RESULT_DECL, CALL)). */ + { + rdecl = make_node (RESULT_DECL); + TREE_TYPE (rdecl) = type; + DECL_MODE (rdecl) = TYPE_MODE (type); + DECL_RESULT (trad_fndecl) = rdecl; + DECL_CONTEXT (rdecl) = trad_fndecl; + modify = build_modify_expr (rdecl, NOP_EXPR, call); + TREE_TYPE (modify) = type; + rtn = make_node (RETURN_EXPR); + TREE_OPERAND (rtn, 0) = modify; + TREE_TYPE (rtn) = type; + } + stmt_list = alloc_stmt_list (); + tsi = tsi_start (stmt_list); + tsi_link_after (&tsi, rtn, TSI_NEW_STMT); + + /* This wrapper consists of "return <my_name>$3SSE (<my_arguments>);" + thus it has no local variables. */ + block = make_node (BLOCK); + TREE_USED (block) = true; + bind = make_node (BIND_EXPR); + BIND_EXPR_BLOCK (bind) = block; + BIND_EXPR_BODY (bind) = stmt_list; + TREE_TYPE (bind) = void_type_node; + TREE_SIDE_EFFECTS (bind) = true; + + DECL_SAVED_TREE (trad_fndecl) = bind; + + /* DECL_ABSTRACT_ORIGIN (new_fndecl) = NULL; *//* ? */ + + ix86_darwin_re_context (&new_fndecl, trad_fndecl, new_fndecl, st); + ix86_darwin_re_context (&DECL_SAVED_TREE (new_fndecl), trad_fndecl, new_fndecl, st); + splay_tree_delete (st); + gimplify_function_tree (new_fndecl); + cgraph_finalize_function (new_fndecl, /* nested = */ true); + gimplify_function_tree (trad_fndecl); + if (!ix86_darwin_regparmandstackparm_st) + ix86_darwin_regparmandstackparm_st + = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); + splay_tree_insert (ix86_darwin_regparmandstackparm_st, + (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl); +} + +/* Entry point into the regparmandstackparm stuff. FNDECL might be marked + 'regparmandstackparm'; if it is, create the fast version, &etc. */ +void +ix86_darwin_handle_regparmandstackparm (tree fndecl) +{ + static unsigned int already_running = 0; + + /* We don't support variable-argument functions yet. */ + if (!fndecl || already_running) + return; + + already_running++; + + if (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) + && !lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + { + if (DECL_STRUCT_FUNCTION (fndecl) && DECL_STRUCT_FUNCTION (fndecl)->stdarg) + error ("regparmandstackparm is incompatible with varargs"); + else if (DECL_SAVED_TREE (fndecl)) + ix86_darwin_regparmandstackparm_wrapper (fndecl); + } + + already_running--; +} +/* APPLE LOCAL end regparmandstackparm */ + +/* APPLE LOCAL begin CW asm blocks */ +#include <ctype.h> +#include "config/asm.h" + +/* Addition register names accepted for inline assembly that would + otherwise not be registers. This table must be sorted for + bsearch. */ +static const char *iasm_additional_names[] = { + "AH", "AL", "AX", "BH", "BL", "BP", "BX", "CH", "CL", "CX", "DH", + "DI", "DL", "DX", "EAX", "EBP", "EBX", "ECX", "EDI", "EDX", "ESI", + "ESP", "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "R10", + "R11", "R12", "R13", "R14", "R15", "R8", "R9", "RAX", "RBP", "RBX", + "RCX", "RDI", "RDX", "RSI", "RSP", "SI", "SP", "ST", "ST(1)", "ST(2)", + "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)", "XMM0", "XMM1", "XMM10", + "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XMM2", "XMM3", "XMM4", + "XMM5", "XMM6", "XMM7", "XMM8", "XMM9" }; + +/* Comparison function for bsearch to find additional register names. */ +static int +iasm_reg_comp (const void *a, const void *b) +{ + char *const*x = a; + char *const*y = b; + int c = strcasecmp (*x, *y); + return c; +} + +/* Translate some register names seen in CW asm into GCC standard + forms. */ + +const char * +i386_iasm_register_name (const char *regname, char *buf) +{ + const char **r; + + /* If we can find the named register, return it. */ + if (decode_reg_name (regname) >= 0) + { + if (ASSEMBLER_DIALECT == ASM_INTEL) + return regname; + sprintf (buf, "%%%s", regname); + return buf; + } + + /* If we can find a lower case version of any registers in + additional_names, return it. */ + r = bsearch (®name, iasm_additional_names, + sizeof (iasm_additional_names) / sizeof (iasm_additional_names[0]), + sizeof (iasm_additional_names[0]), iasm_reg_comp); + if (r) + { + char *p; + const char *q; + q = regname = *r; + p = buf; + if (ASSEMBLER_DIALECT != ASM_INTEL) + *p++ = '%'; + regname = p; + while ((*p++ = tolower (*q++))) + ; + if (decode_reg_name (regname) >= 0) + return buf; + } + + return NULL; +} + +/* Return true iff the opcode wants memory to be stable. We arrange + for a memory clobber in these instances. */ +bool +iasm_memory_clobber (const char *ARG_UNUSED (opcode)) +{ + return true; +} + +/* Return true iff the operands need swapping. */ + +bool +iasm_x86_needs_swapping (const char *opcode) +{ + /* Don't swap if output format is the same as input format. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + return false; + + /* These don't need swapping. */ + if (strcasecmp (opcode, "bound") == 0) + return false; + if (strcasecmp (opcode, "invlpga") == 0) + return false; + if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1)) + return false; + + return true; +} + +/* Swap operands, given in MS-style asm ordering when the output style + is in ATT syntax. */ + +static tree +iasm_x86_swap_operands (const char *opcode, tree args) +{ + int noperands; + + if (iasm_x86_needs_swapping (opcode) == false) + return args; + +#if 0 + /* GAS also checks the type of the arguments to determine if they + need swapping. */ + if ((argtype[0]&Imm) && (argtype[1]&Imm)) + return args; +#endif + noperands = list_length (args); + if (noperands == 2 || noperands == 3) + { + /* Swap first and last (1 and 2 or 1 and 3). */ + return nreverse (args); + } + return args; +} + +/* Map a register name to a high level tree type for a VAR_DECL of + that type, whose RTL will refer to the given register. */ + +static tree +iasm_type_for (tree arg) +{ + tree type = NULL_TREE; + + if (IDENTIFIER_LENGTH (arg) > 2 + && IDENTIFIER_POINTER (arg)[0] == '%') + { + enum machine_mode mode = VOIDmode; + if (IDENTIFIER_POINTER (arg)[1] == 'e') + mode = SImode; + else if (/* IDENTIFIER_POINTER (arg)[2] == 'h' + || */ IDENTIFIER_POINTER (arg)[2] == 'l') + mode = QImode; + else if (IDENTIFIER_POINTER (arg)[2] == 'x') + mode = HImode; + else if (IDENTIFIER_POINTER (arg)[1] == 'r') + mode = DImode; + else if (IDENTIFIER_POINTER (arg)[1] == 'x') + mode = SFmode; + else if (IDENTIFIER_POINTER (arg)[1] == 'm') + mode = SFmode; + + if (mode != VOIDmode) + type = lang_hooks.types.type_for_mode (mode, 1); + } + + return type; +} + +/* We raise the code from a named register into a VAR_DECL of an + appropriate type that refers to the register so that reload doesn't + run out of registers. */ + +tree +iasm_raise_reg (tree arg) +{ + int regno = decode_reg_name (IDENTIFIER_POINTER (arg)); + if (regno >= 0) + { + tree decl = NULL_TREE; + + decl = lookup_name (arg); + if (decl == error_mark_node) + decl = 0; + if (decl == 0) + { + tree type = iasm_type_for (arg); + if (type) + { + decl = build_decl (VAR_DECL, arg, type); + DECL_ARTIFICIAL (decl) = 1; + DECL_REGISTER (decl) = 1; + C_DECL_REGISTER (decl) = 1; + DECL_HARD_REGISTER (decl) = 1; + set_user_assembler_name (decl, IDENTIFIER_POINTER (arg)); + decl = lang_hooks.decls.pushdecl (decl); + } + } + + if (decl) + return decl; + } + + return arg; +} + +/* Allow constants and readonly variables to be used in instructions + in places that require constants. */ + +static tree +iasm_default_conv (tree e) +{ + if (e == NULL_TREE) + return e; + + if (TREE_CODE (e) == CONST_DECL) + e = DECL_INITIAL (e); + + if (DECL_P (e) && DECL_MODE (e) != BLKmode) + e = decl_constant_value (e); + return e; +} + +/* Return true iff the operand is suitible for as the offset for a + memory instruction. */ + +static bool +iasm_is_offset (tree v) +{ + if (TREE_CODE (v) == INTEGER_CST) + return true; + if (TREE_CODE (v) == ADDR_EXPR) + { + v = TREE_OPERAND (v, 0); + if (TREE_CODE (v) == VAR_DECL + && TREE_STATIC (v) + && MEM_P (DECL_RTL (v))) + { + note_alternative_entry_points (); + return true; + } + if (TREE_CODE (v) == LABEL_DECL) + return true; + return false; + } + if (TREE_CODE (v) == VAR_DECL + && TREE_STATIC (v) + && MEM_P (DECL_RTL (v))) + { + note_alternative_entry_points (); + return true; + } + if ((TREE_CODE (v) == MINUS_EXPR + || TREE_CODE (v) == PLUS_EXPR) + && iasm_is_offset (TREE_OPERAND (v, 0)) + && iasm_is_offset (TREE_OPERAND (v, 1))) + return true; + if (TREE_CODE (v) == NEGATE_EXPR + && iasm_is_offset (TREE_OPERAND (v, 0))) + return true; + + return false; +} + +/* Combine two types for [] expressions. */ + +static tree +iasm_combine_type (tree type0, tree type1) +{ + if (type0 == void_type_node + || type0 == NULL_TREE) + { + if (type1 == void_type_node) + return NULL_TREE; + return type1; + } + + if (type1 == void_type_node + || type1 == NULL_TREE) + return type0; + + if (type0 == type1) + return type0; + + error ("too many types in []"); + + return type0; +} + +/* We canonicalize the inputs form of bracket expressions as the input + forms are less constrained than what the assembler will accept. + + TOP is the top of the canonical tree we're generating and + TREE_OPERAND (, 0) is the offset portion of the expression. ARGP + points to the current part of the tree we're walking. + + The tranformations we do: + + (A+O) ==> A + (A-O) ==> A + (O+A) ==> A + + where O are offset expressions. */ + +static tree +iasm_canonicalize_bracket_1 (tree* argp, tree top) +{ + tree arg = *argp; + tree offset = TREE_OPERAND (top, 0); + tree arg0, arg1; + tree rtype = NULL_TREE; + + *argp = arg = iasm_default_conv (arg); + + switch (TREE_CODE (arg)) + { + case NOP_EXPR: + if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE) + { + *argp = TREE_OPERAND (arg, 0); + return TREE_TYPE (arg); + } + break; + + case BRACKET_EXPR: + rtype = TREE_TYPE (arg); + /* fall thru */ + case PLUS_EXPR: + arg0 = TREE_OPERAND (arg, 0); + arg1 = TREE_OPERAND (arg, 1); + + arg0 = iasm_default_conv (arg0); + arg1 = iasm_default_conv (arg1); + + if (iasm_is_offset (arg0)) + { + if (offset != integer_zero_node) + arg0 = build2 (PLUS_EXPR, void_type_node, arg0, offset); + TREE_OPERAND (top, 0) = arg0; + + *argp = arg1; + if (arg1) + return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top)); + } + else if (arg1 && iasm_is_offset (arg1)) + { + if (offset != integer_zero_node) + arg1 = build2 (PLUS_EXPR, void_type_node, arg1, offset); + TREE_OPERAND (top, 0) = arg1; + *argp = arg0; + return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top)); + } + else + { + rtype = iasm_combine_type (rtype, + iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top)); + + if (arg1) + rtype = iasm_combine_type (rtype, + iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), top)); + if (TREE_OPERAND (arg, 0) == NULL_TREE) + { + if (TREE_OPERAND (arg, 1)) + { + TREE_OPERAND (arg, 0) = TREE_OPERAND (arg, 1); + TREE_OPERAND (arg, 1) = NULL_TREE; + } + else + *argp = NULL_TREE; + } + else if (TREE_OPERAND (arg, 1) == NULL_TREE && rtype == NULL_TREE) + *argp = TREE_OPERAND (arg, 0); + if (TREE_CODE (arg) == PLUS_EXPR + && TREE_TYPE (arg) == NULL_TREE + && TREE_TYPE (TREE_OPERAND (arg, 0)) + && TREE_TYPE (TREE_OPERAND (arg, 1)) + && (POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1))) + || POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0))))) + { + tree type = TREE_TYPE (TREE_OPERAND (arg, 1)); + if (INTEGRAL_TYPE_P (type)) + type = TREE_TYPE (TREE_OPERAND (arg, 0)); + TREE_TYPE (arg) = type; + } + if (TREE_CODE (arg) == PLUS_EXPR + && TREE_TYPE (arg) == NULL_TREE + && TREE_TYPE (TREE_OPERAND (arg, 0)) + && TREE_TYPE (TREE_OPERAND (arg, 0)) == TREE_TYPE (TREE_OPERAND (arg, 1))) + { + tree type = TREE_TYPE (TREE_OPERAND (arg, 0)); + TREE_TYPE (arg) = type; + } + } + return rtype; + + case MINUS_EXPR: + rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top); + arg0 = TREE_OPERAND (arg, 0); + arg1 = TREE_OPERAND (arg, 1); + arg1 = iasm_default_conv (arg1); + if (iasm_is_offset (arg1)) + { + offset = TREE_OPERAND (top, 0); + if (offset == integer_zero_node) + arg1 = fold (build1 (NEGATE_EXPR, + TREE_TYPE (arg1), + arg1)); + else + arg1 = build2 (MINUS_EXPR, void_type_node, offset, arg1); + TREE_OPERAND (top, 0) = arg1; + *argp = arg0; + return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));; + } + return rtype; + + case PARM_DECL: + case VAR_DECL: + { + *argp = iasm_addr (arg); + break; + } + + case IDENTIFIER_NODE: + { + *argp = iasm_raise_reg (arg); + break; + } + + case MULT_EXPR: + if (TREE_TYPE (arg) == NULL_TREE) + { + if (TREE_CODE (TREE_OPERAND (arg, 1)) == IDENTIFIER_NODE) + TREE_OPERAND (arg, 1) = iasm_raise_reg (TREE_OPERAND (arg, 1)); + if (TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE) + TREE_OPERAND (arg, 0) = iasm_raise_reg (TREE_OPERAND (arg, 0)); + if (TREE_TYPE (TREE_OPERAND (arg, 0)) + && TREE_TYPE (TREE_OPERAND (arg, 1))) + TREE_TYPE (arg) = TREE_TYPE (TREE_OPERAND (arg, 0)); + } + break; + + default: + break; + } + + return NULL_TREE; +} + +/* Form an indirection for an inline asm address expression operand. + We give a warning when we think the optimizer might have to be used + to reform complex addresses, &stack_var + %eax + 4 for example, + after gimplification rips the address apart. */ + +static tree +iasm_indirect (tree addr) +{ + if (TREE_CODE (addr) == ADDR_EXPR + && TREE_CODE (TREE_TYPE (TREE_OPERAND (addr, 0))) != ARRAY_TYPE + /* && TREE_CODE (TREE_OPERAND (addr, 0)) == ARRAY_REF */) + return TREE_OPERAND (addr, 0); + + addr = fold (build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr)); + + if (! optimize && TREE_CODE (addr) == INDIRECT_REF) + warning (0, "addressing mode too complex when not optimizing, will consume extra register(s)"); + + return addr; +} + +/* For an address addition for an inline asm address expression. We + try and form ARRAY_REFs, as they will go through gimplification + without being ripped apart. */ + +static tree +iasm_add (tree addr, tree off) +{ + if (integer_zerop (off)) + return addr; + + /* We have to convert the offset to an int type, as we rip apart + trees whose type has been converted to a pointer type for the + offset already. */ + return pointer_int_sum (PLUS_EXPR, addr, convert (integer_type_node, off)); +} + +/* We canonicalize the inputs form of bracket expressions as the input + forms are less constrained than what the assembler will accept. */ + +static tree +iasm_canonicalize_bracket (tree arg) +{ + tree rtype; + + gcc_assert (TREE_CODE (arg) == BRACKET_EXPR); + + /* Let the normal operand printer output this without trying to + decompose it into parts so that things like (%esp + 20) + 4 can + be output as 24(%esp) by the optimizer instead of 4(%0) and + burning an "R" with (%esp + 20). */ + if (TREE_OPERAND (arg, 1) == NULL_TREE + && TREE_TYPE (TREE_OPERAND (arg, 0)) + && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))) + { + if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL + || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL) + return arg; + return iasm_indirect (TREE_OPERAND (arg, 0)); + } + + /* Ensure that 0 is an offset */ + if (TREE_OPERAND (arg, 0) + && iasm_is_offset (TREE_OPERAND (arg, 0))) + { + /* we win if 0 is an offset already. */ + } + else if (TREE_OPERAND (arg, 1) == NULL_TREE) + { + /* Move 0 to 1, if 1 is empty and 0 isn't already an offset */ + TREE_OPERAND (arg, 1) = TREE_OPERAND (arg, 0); + TREE_OPERAND (arg, 0) = integer_zero_node; + } + else + { + tree swp; + /* Just have to force it now */ + swp = iasm_build_bracket (TREE_OPERAND (arg, 0), TREE_OPERAND (arg, 1)); + TREE_OPERAND (arg, 0) = integer_zero_node; + TREE_OPERAND (arg, 1) = swp; + } + + if (TREE_OPERAND (arg, 1)) + { + rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), arg); + if (rtype) + TREE_TYPE (arg) = iasm_combine_type (TREE_TYPE (arg), rtype); + } + + /* For correctness, pointer types should be raised to the tree + level, as they denote address calculations with stack based + objects, and we want print_operand to print the entire address so + that it can combine contants and hard registers into the address. + Unfortunnately we might have to rely upon the optimizer to reform + the address after the gimplification pass rips it apart. */ + + /* Handle [INTEGER_CST][ptr][op3] */ + if (TREE_OPERAND (arg, 1) + && TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST + && TREE_CODE (TREE_OPERAND (arg, 1)) == BRACKET_EXPR + && TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)) + && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) + && TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) != void_type_node + && (TREE_TYPE (arg) == void_type_node + || (TREE_TYPE (arg) == get_identifier ("word") + && (TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)))) + == HImode)))) + { + tree op3 = TREE_OPERAND (TREE_OPERAND (arg, 1), 1); + tree addr = iasm_add (TREE_OPERAND (TREE_OPERAND (arg, 1), 0), + TREE_OPERAND (arg, 0)); + tree type; + addr = iasm_indirect (addr); + if (op3 == NULL_TREE) + return addr; + type = TREE_TYPE (addr); + type = build_pointer_type (type); + addr = build1 (ADDR_EXPR, type, addr); + addr = fold (build2 (PLUS_EXPR, type, addr, op3)); + return iasm_indirect (addr); + } + + /* Handle ptr + INTEGER_CST */ + if (TREE_OPERAND (arg, 1) + && TREE_TYPE (arg) == void_type_node + && TREE_TYPE (TREE_OPERAND (arg, 1)) + && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1))) + && TREE_TYPE (TREE_TYPE (TREE_OPERAND (arg, 1))) != void_type_node) + { + if (TREE_CODE (TREE_OPERAND (arg, 1)) == ADDR_EXPR) + { + if (TREE_OPERAND (arg, 0) == integer_zero_node) + return TREE_OPERAND (TREE_OPERAND (arg, 1), 0); + if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST) + return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0))); + } + if (TREE_CODE (TREE_OPERAND (arg, 1)) == PLUS_EXPR) + { + if (TREE_OPERAND (arg, 0) == integer_zero_node) + return iasm_indirect (TREE_OPERAND (arg, 1)); + if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST) + return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0))); + } + } + return arg; +} + +/* We canonicalize the instruction by swapping operands and rewritting + the opcode if the output style is in ATT syntax. */ + +tree +iasm_x86_canonicalize_operands (const char **opcode_p, tree iargs, void *ep) +{ + iasm_md_extra_info *e = ep; + static char buf[40]; + tree args = iargs; + int argnum = 1; + const char *opcode = *opcode_p; + bool fp_style = false; + bool fpi_style = false; + + /* Don't transform if output format is the same as input format. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + return iargs; + + if (strncasecmp (opcode, "f", 1) == 0) + fp_style = true; + + if (fp_style + && strncasecmp (opcode+1, "i", 1) == 0) + fpi_style = true; + + while (args) + { + tree arg = TREE_VALUE (args); + + /* Handle st(3) */ + if (TREE_CODE (arg) == COMPOUND_EXPR + && TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE + && strcasecmp (IDENTIFIER_POINTER (TREE_OPERAND (arg, 0)), "%st") == 0 + && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST) + { + int v = tree_low_cst (TREE_OPERAND (arg, 1), 0); + + if (v < 0 || v > 7) + { + error ("unknown floating point register st(%d)", v); + v = 0; + } + + /* Rewrite %st(0) to %st. */ + if (v == 0) + TREE_VALUE (args) = TREE_OPERAND (arg, 0); + else + { + char buf[20]; + sprintf (buf, "%%st(%d)", v); + TREE_VALUE (args) = get_identifier (buf); + } + } + else if (TREE_CODE (arg) == BRACKET_EXPR) + TREE_VALUE (args) = arg = iasm_canonicalize_bracket (arg); + + switch (TREE_CODE (arg)) + { + case ARRAY_REF: + case VAR_DECL: + case PARM_DECL: + case INDIRECT_REF: + if (TYPE_MODE (TREE_TYPE (arg)) == QImode) + e->mod[argnum-1] = 'b'; + else if (TYPE_MODE (TREE_TYPE (arg)) == HImode) + e->mod[argnum-1] = fpi_style ? 's' : 'w'; + else if (TYPE_MODE (TREE_TYPE (arg)) == SImode) + e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l'); + else if (TYPE_MODE (TREE_TYPE (arg)) == DImode) + e->mod[argnum-1] = 'q'; + else if (TYPE_MODE (TREE_TYPE (arg)) == SFmode) + e->mod[argnum-1] = 's'; + else if (TYPE_MODE (TREE_TYPE (arg)) == DFmode) + e->mod[argnum-1] = 'l'; + else if (TYPE_MODE (TREE_TYPE (arg)) == XFmode) + e->mod[argnum-1] = 't'; + break; + case BRACKET_EXPR: + /* We use the TREE_TYPE to indicate the type of operand, it + it set with code like: inc dword ptr [eax]. */ + if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE) + { + const char *s = IDENTIFIER_POINTER (TREE_TYPE (arg)); + if (strcasecmp (s, "byte") == 0) + e->mod[argnum-1] = 'b'; + else if (strcasecmp (s, "word") == 0) + e->mod[argnum-1] = fpi_style ? 's' : 'w'; + else if (strcasecmp (s, "dword") == 0) + e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l'); + else if (strcasecmp (s, "qword") == 0) + e->mod[argnum-1] = 'q'; + else if (strcasecmp (s, "real4") == 0) + e->mod[argnum-1] = 's'; + else if (strcasecmp (s, "real8") == 0) + e->mod[argnum-1] = 'l'; + else if (strcasecmp (s, "real10") == 0) + e->mod[argnum-1] = 't'; + else if (strcasecmp (s, "tbyte") == 0) + e->mod[argnum-1] = 't'; + } + break; + case LABEL_DECL: + e->mod[argnum-1] = 'l'; + break; + case IDENTIFIER_NODE: + if (IDENTIFIER_LENGTH (arg) > 2 + && IDENTIFIER_POINTER (arg)[0] == '%') + { + if (IDENTIFIER_POINTER (arg)[1] == 'e') + e->mod[argnum-1] = 'l'; + else if (IDENTIFIER_POINTER (arg)[2] == 'h' + || IDENTIFIER_POINTER (arg)[2] == 'l') + e->mod[argnum-1] = 'b'; + else if (IDENTIFIER_POINTER (arg)[2] == 'x') + e->mod[argnum-1] = 'w'; + } + break; + default: + break; + } + args = TREE_CHAIN (args); + ++argnum; + } + --argnum; + + args = iasm_x86_swap_operands (opcode, iargs); + if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1)) + e->pseudo = true; + + if (strcasecmp (opcode, "movs") == 0 + || strcasecmp (opcode, "scas") == 0 + || strcasecmp (opcode, "stos") == 0 + || strcasecmp (opcode, "xlat") == 0) + args = NULL_TREE; + else if (strcasecmp (opcode, "cmovpo") == 0) + opcode = "cmovnp"; + else if (strcasecmp (opcode, "cmovpe") == 0) + opcode = "cmovp"; + else if (strcasecmp (opcode, "outs") == 0 + && TREE_CHAIN (args)) + { + e->mod[0] = e->mod[1]; + } + else if (strcasecmp (opcode, "ins") == 0 + && TREE_CHAIN (args)) + { + e->mod[1] = 0; + } + /* movsx isn't part of the AT&T syntax, they spell it movs. */ + else if (strcasecmp (opcode, "movsx") == 0) + opcode = "movs"; + else if (strcasecmp (opcode, "pushfd") == 0) + *opcode_p = "pushf"; + else if (strcasecmp (opcode, "popfd") == 0) + *opcode_p = "popf"; + + /* movzx isn't part of the AT&T syntax, they spell it movz. */ + if (strcasecmp (opcode, "movzx") == 0) + { + /* Silly extention of the day, A zero extended move that has the + same before and after size is accepted and it just a normal + move. */ + if (argnum == 2 + && (e->mod[0] == e->mod[1] + || e->mod[1] == 0)) + opcode = "mov"; + else + opcode = "movz"; + } + + if (strncasecmp (opcode, "f", 1) == 0 && + (!(strcasecmp (opcode, "fldcw") == 0))) + { + if (e->mod[0] == 'w') + e->mod[0] = 's'; + if (e->mod[1] == 'w') + e->mod[1] = 's'; + } + else if (strcasecmp (opcode, "mov") == 0) + { + /* The 32-bit integer instructions can be used on floats. */ + if (e->mod[0] == 's') + e->mod[0] = 'l'; + if (e->mod[1] == 's') + e->mod[1] = 'l'; + } + + if (e->pseudo) + e->mod[0] = e->mod[1] = 0; + else if (strcasecmp (opcode, "clflush") == 0 + || strcasecmp (opcode, "fbld") == 0 + || strcasecmp (opcode, "fbstp") == 0 + || strcasecmp (opcode, "fldt") == 0 + || strcasecmp (opcode, "fnstcw") == 0 + || strcasecmp (opcode, "fnstsw") == 0 + || strcasecmp (opcode, "fstcw") == 0 + || strcasecmp (opcode, "fstsw") == 0 + || strcasecmp (opcode, "fxrstor") == 0 + || strcasecmp (opcode, "fxsave") == 0 + || strcasecmp (opcode, "invlpg") == 0 + || strcasecmp (opcode, "jmp") == 0 + || strcasecmp (opcode, "call") == 0 + || strcasecmp (opcode, "ja") == 0 + || strcasecmp (opcode, "jae") == 0 + || strcasecmp (opcode, "jb") == 0 + || strcasecmp (opcode, "jbe") == 0 + || strcasecmp (opcode, "jc") == 0 + || strcasecmp (opcode, "je") == 0 + || strcasecmp (opcode, "jg") == 0 + || strcasecmp (opcode, "jge") == 0 + || strcasecmp (opcode, "jl") == 0 + || strcasecmp (opcode, "jle") == 0 + || strcasecmp (opcode, "jna") == 0 + || strcasecmp (opcode, "jnae") == 0 + || strcasecmp (opcode, "jnb") == 0 + || strcasecmp (opcode, "jnc") == 0 + || strcasecmp (opcode, "jne") == 0 + || strcasecmp (opcode, "jng") == 0 + || strcasecmp (opcode, "jnge") == 0 + || strcasecmp (opcode, "jnl") == 0 + || strcasecmp (opcode, "jnle") == 0 + || strcasecmp (opcode, "jno") == 0 + || strcasecmp (opcode, "jnp") == 0 + || strcasecmp (opcode, "jns") == 0 + || strcasecmp (opcode, "jnz") == 0 + || strcasecmp (opcode, "jo") == 0 + || strcasecmp (opcode, "jp") == 0 + || strcasecmp (opcode, "jpe") == 0 + || strcasecmp (opcode, "jpo") == 0 + || strcasecmp (opcode, "js") == 0 + || strcasecmp (opcode, "jz") == 0 + || strcasecmp (opcode, "ldmxcsr") == 0 + || strcasecmp (opcode, "lgdt") == 0 + || strcasecmp (opcode, "lidt") == 0 + || strcasecmp (opcode, "lldt") == 0 + || strcasecmp (opcode, "lmsw") == 0 + || strcasecmp (opcode, "ltr") == 0 + || strcasecmp (opcode, "movapd") == 0 + || strcasecmp (opcode, "movaps") == 0 + || strcasecmp (opcode, "movd") == 0 + || strcasecmp (opcode, "movhpd") == 0 + || strcasecmp (opcode, "movhps") == 0 + || strcasecmp (opcode, "movlpd") == 0 + || strcasecmp (opcode, "movlps") == 0 + || strcasecmp (opcode, "movntdq") == 0 + || strcasecmp (opcode, "movntpd") == 0 + || strcasecmp (opcode, "movntps") == 0 + || strcasecmp (opcode, "movntq") == 0 + || strcasecmp (opcode, "movq") == 0 + || strcasecmp (opcode, "movsd") == 0 + || strcasecmp (opcode, "movss") == 0 + || strcasecmp (opcode, "movupd") == 0 + || strcasecmp (opcode, "movups") == 0 + || strcasecmp (opcode, "out") == 0 + || strcasecmp (opcode, "prefetchnta") == 0 + || strcasecmp (opcode, "prefetcht0") == 0 + || strcasecmp (opcode, "prefetcht1") == 0 + || strcasecmp (opcode, "prefetcht2") == 0 + || strcasecmp (opcode, "seta") == 0 + || strcasecmp (opcode, "setae") == 0 + || strcasecmp (opcode, "setb") == 0 + || strcasecmp (opcode, "setbe") == 0 + || strcasecmp (opcode, "setc") == 0 + || strcasecmp (opcode, "sete") == 0 + || strcasecmp (opcode, "setg") == 0 + || strcasecmp (opcode, "setge") == 0 + || strcasecmp (opcode, "setl") == 0 + || strcasecmp (opcode, "setle") == 0 + || strcasecmp (opcode, "setna") == 0 + || strcasecmp (opcode, "setnae") == 0 + || strcasecmp (opcode, "setnb") == 0 + || strcasecmp (opcode, "setnbe") == 0 + || strcasecmp (opcode, "setnc") == 0 + || strcasecmp (opcode, "setne") == 0 + || strcasecmp (opcode, "setng") == 0 + || strcasecmp (opcode, "setnge") == 0 + || strcasecmp (opcode, "setnl") == 0 + || strcasecmp (opcode, "setnle") == 0 + || strcasecmp (opcode, "setno") == 0 + || strcasecmp (opcode, "setnp") == 0 + || strcasecmp (opcode, "setns") == 0 + || strcasecmp (opcode, "setnz") == 0 + || strcasecmp (opcode, "seto") == 0 + || strcasecmp (opcode, "setp") == 0 + || strcasecmp (opcode, "setpe") == 0 + || strcasecmp (opcode, "setpo") == 0 + || strcasecmp (opcode, "sets") == 0 + || strcasecmp (opcode, "setz") == 0 + || strcasecmp (opcode, "sldt") == 0 + || strcasecmp (opcode, "smsw") == 0 + || strcasecmp (opcode, "stmxcsr") == 0 + || strcasecmp (opcode, "str") == 0 + || strcasecmp (opcode, "xlat") == 0) + e->mod[0] = 0; + else if (strcasecmp (opcode, "lea") == 0 + || strcasecmp (opcode, "rcl") == 0 + || strcasecmp (opcode, "rcr") == 0 + || strcasecmp (opcode, "rol") == 0 + || strcasecmp (opcode, "ror") == 0 + || strcasecmp (opcode, "sal") == 0 + || strcasecmp (opcode, "sar") == 0 + || strcasecmp (opcode, "shl") == 0 + || strcasecmp (opcode, "shr") == 0) + e->mod[1] = 0; + + if ((argnum == 1 && e->mod[0]) + || (argnum == 2 && e->mod[0] + && (e->mod[0] == e->mod[1] + || e->mod[1] == 0))) + { + sprintf (buf, "%s%c", opcode, e->mod[0]); + *opcode_p = buf; + } + else if (argnum == 2 && e->mod[0] && e->mod[1]) + { + sprintf (buf, "%s%c%c", opcode, e->mod[1], e->mod[0]); + *opcode_p = buf; + } + + return args; +} + +/* Character used to seperate the prefix words. */ +/* See radr://4141844 for the enhancement to make this uniformly ' '. */ +#define IASM_PREFIX_SEP '/' + +void +iasm_x86_print_prefix (char *buf, tree prefix_list) +{ + buf += strlen (buf); + while (prefix_list) + { + tree prefix = TREE_VALUE (prefix_list); + size_t len = IDENTIFIER_LENGTH (prefix); + memcpy (buf, IDENTIFIER_POINTER (prefix), len); + buf += len; + buf[0] = IASM_PREFIX_SEP; + ++buf; + buf[0] = 0; + prefix_list = TREE_CHAIN (prefix_list); + } +} + +/* Warn when a variables address is used to form a memory address when + that address will use an extra register during reload. */ + +static void +iasm_warn_extra_reg (tree arg) +{ + if (TREE_CODE (arg) == ADDR_EXPR + && (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL + || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL)) + warning (0, "addressing mode too complex, will consume an extra register"); +} + +bool +iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses, + bool must_be_reg, bool must_not_be_reg, void *ep) +{ + iasm_md_extra_info *e = ep; + switch (TREE_CODE (arg)) + { + case BRACKET_EXPR: + { + tree op1 = TREE_OPERAND (arg, 0); + tree op2 = TREE_OPERAND (arg, 1); + tree op0 = NULL_TREE, op3 = NULL_TREE; + tree scale = NULL_TREE; + + if (op2 == NULL_TREE + && TREE_TYPE (op1) + && POINTER_TYPE_P (TREE_TYPE (op1))) + { + /* Let the normal operand printer output this without trying to + decompose it into parts so that things like (%esp + 20) + 4 + can be output as 24(%esp) by the optimizer instead of 4(%0) + and burning an "R" with (%esp + 20). */ + iasm_force_constraint ("m", e); + iasm_get_register_var (op1, "", buf, argnum, must_be_reg, e); + iasm_force_constraint (0, e); + break; + } + + if (op2 + && TREE_CODE (op2) == BRACKET_EXPR) + { + op3 = TREE_OPERAND (op2, 1); + op2 = TREE_OPERAND (op2, 0); + if (TREE_CODE (op2) == BRACKET_EXPR) + { + op0 = TREE_OPERAND (op2, 1); + op2 = TREE_OPERAND (op2, 0); + } + } + if (op0) + return false; + + if (ASSEMBLER_DIALECT == ASM_INTEL) + strcat (buf, "["); + + if (op3 == NULL_TREE + && op2 && TREE_CODE (op2) == PLUS_EXPR) + { + op3 = TREE_OPERAND (op2, 0); + op2 = TREE_OPERAND (op2, 1); + } + if (op2 && TREE_CODE (op2) == MULT_EXPR) + { + tree t; + t = op3; + op3 = op2; + op2 = t; + } + + /* Crack out the scaling, if any. */ + if (ASSEMBLER_DIALECT == ASM_ATT + && op3 + && TREE_CODE (op3) == MULT_EXPR) + { + if (TREE_CODE (TREE_OPERAND (op3, 1)) == INTEGER_CST) + { + scale = TREE_OPERAND (op3, 1); + op3 = TREE_OPERAND (op3, 0); + } + else if (TREE_CODE (TREE_OPERAND (op3, 0)) == INTEGER_CST) + { + scale = TREE_OPERAND (op3, 0); + op3 = TREE_OPERAND (op3, 1); + } + } + + /* Complicated expression as JMP or CALL target. */ + if (e->modifier && strcmp(e->modifier, "A") == 0) + { + strcat (buf, "*"); + e->modifier = 0; + } + e->as_immediate = true; + iasm_print_operand (buf, op1, argnum, uses, + must_be_reg, must_not_be_reg, e); + e->as_immediate = false; + + /* Just an immediate. */ + if (op2 == NULL_TREE && op3 == NULL_TREE) + break; + + if (ASSEMBLER_DIALECT == ASM_INTEL) + strcat (buf, "]"); + if (ASSEMBLER_DIALECT == ASM_INTEL) + strcat (buf, "["); + else + strcat (buf, "("); + + if (op2) + { + /* We know by context, this has to be an R. */ + iasm_force_constraint ("R", e); + iasm_warn_extra_reg (op2); + iasm_print_operand (buf, op2, argnum, uses, + must_be_reg, must_not_be_reg, e); + iasm_force_constraint (0, e); + } + if (op3) + { + if (ASSEMBLER_DIALECT == ASM_INTEL) + strcat (buf, "]["); + else + strcat (buf, ","); + + /* We know by context, this has to be an l. */ + iasm_force_constraint ("l", e); + iasm_warn_extra_reg (op3); + iasm_print_operand (buf, op3, argnum, uses, + must_be_reg, must_not_be_reg, e); + iasm_force_constraint (0, e); + if (scale) + { + strcat (buf, ","); + e->as_immediate = true; + iasm_print_operand (buf, scale, argnum, uses, + must_be_reg, must_not_be_reg, e); + e->as_immediate = false; + } + } + if (ASSEMBLER_DIALECT == ASM_INTEL) + strcat (buf, "]"); + else + strcat (buf, ")"); + } + break; + + case ADDR_EXPR: + if ((TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF + || TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL) + && ! e->as_immediate) + { + iasm_get_register_var (arg, "", buf, argnum, must_be_reg, e); + break; + } + if (! e->as_immediate) + e->as_offset = true; + iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses, + must_be_reg, must_not_be_reg, e); + e->as_offset = false; + break; + + case MULT_EXPR: + iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses, + must_be_reg, must_not_be_reg, e); + strcat (buf, "*"); + iasm_print_operand (buf, TREE_OPERAND (arg, 1), argnum, uses, + must_be_reg, must_not_be_reg, e); + break; + default: + return false; + } + return true; +} +/* APPLE LOCAL end CW asm blocks */ + +/* Return the mangling of TYPE if it is an extended fundamental type. */ + +static const char * +/* APPLE LOCAL mangle_type 7105099 */ +ix86_mangle_type (tree type) +{ + /* APPLE LOCAL begin mangle_type 7105099 */ + type = TYPE_MAIN_VARIANT (type); + + if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE + && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) + return NULL; + + /* APPLE LOCAL end mangle_type 7105099 */ + switch (TYPE_MODE (type)) + { + case TFmode: + /* __float128 is "g". */ + return "g"; + case XFmode: + /* "long double" or __float80 is "e". */ + return "e"; + default: + return NULL; + } +} + +/* For 32-bit code we can save PIC register setup by using + __stack_chk_fail_local hidden function instead of calling + __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC + register, so it is better to call __stack_chk_fail directly. */ + +static tree +ix86_stack_protect_fail (void) +{ + return TARGET_64BIT + ? default_external_stack_protect_fail () + : default_hidden_stack_protect_fail (); +} + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + ??? All x86 object file formats are capable of representing this. + After all, the relocation needed is the same as for the call insn. + Whether or not a particular assembler allows us to enter such, I + guess we'll have to see. */ +int +asm_preferred_eh_data_format (int code, int global) +{ + if (flag_pic) + { + int type = DW_EH_PE_sdata8; + if (!TARGET_64BIT + || ix86_cmodel == CM_SMALL_PIC + || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) + type = DW_EH_PE_sdata4; + return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; + } + if (ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM && code)) + return DW_EH_PE_udata4; + return DW_EH_PE_absptr; +} + +#include "gt-i386.h" diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.h b/gcc-4.2.1-5666.3/gcc/config/i386/i386.h new file mode 100644 index 000000000..df7703b36 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.h @@ -0,0 +1,3230 @@ +/* Definitions of target machine for GCC for IA-32. + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* The purpose of this file is to define the characteristics of the i386, + independent of assembler syntax or operating system. + + Three other files build on this one to describe a specific assembler syntax: + bsd386.h, att386.h, and sun386.h. + + The actual tm.h file for a particular system should include + this file, and then the file for the appropriate assembler syntax. + + Many macros that specify assembler syntax are omitted entirely from + this file because they really belong in the files for particular + assemblers. These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR, + ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many + that start with ASM_ or end in ASM_OP. */ + +/* Define the specific costs for a given cpu */ + +struct processor_costs { + const int add; /* cost of an add instruction */ + const int lea; /* cost of a lea instruction */ + const int shift_var; /* variable shift costs */ + const int shift_const; /* constant shift costs */ + const int mult_init[5]; /* cost of starting a multiply + in QImode, HImode, SImode, DImode, TImode*/ + const int mult_bit; /* cost of multiply per each bit set */ + const int divide[5]; /* cost of a divide/mod + in QImode, HImode, SImode, DImode, TImode*/ + int movsx; /* The cost of movsx operation. */ + int movzx; /* The cost of movzx operation. */ + const int large_insn; /* insns larger than this cost more */ + const int move_ratio; /* The threshold of number of scalar + memory-to-memory move insns. */ + const int movzbl_load; /* cost of loading using movzbl */ + const int int_load[3]; /* cost of loading integer registers + in QImode, HImode and SImode relative + to reg-reg move (2). */ + const int int_store[3]; /* cost of storing integer register + in QImode, HImode and SImode */ + const int fp_move; /* cost of reg,reg fld/fst */ + const int fp_load[3]; /* cost of loading FP register + in SFmode, DFmode and XFmode */ + const int fp_store[3]; /* cost of storing FP register + in SFmode, DFmode and XFmode */ + const int mmx_move; /* cost of moving MMX register. */ + const int mmx_load[2]; /* cost of loading MMX register + in SImode and DImode */ + const int mmx_store[2]; /* cost of storing MMX register + in SImode and DImode */ + const int sse_move; /* cost of moving SSE register. */ + const int sse_load[3]; /* cost of loading SSE register + in SImode, DImode and TImode*/ + const int sse_store[3]; /* cost of storing SSE register + in SImode, DImode and TImode*/ + const int mmxsse_to_integer; /* cost of moving mmxsse register to + integer and vice versa. */ + const int prefetch_block; /* bytes moved to cache for prefetch. */ + const int simultaneous_prefetches; /* number of parallel prefetch + operations. */ + const int branch_cost; /* Default value for BRANCH_COST. */ + const int fadd; /* cost of FADD and FSUB instructions. */ + const int fmul; /* cost of FMUL instruction. */ + const int fdiv; /* cost of FDIV instruction. */ + const int fabs; /* cost of FABS instruction. */ + const int fchs; /* cost of FCHS instruction. */ + const int fsqrt; /* cost of FSQRT instruction. */ +}; + +extern const struct processor_costs *ix86_cost; + +/* Macros used in the machine description to test the flags. */ + +/* configure can arrange to make this 2, to force a 486. */ + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic +#endif + +#ifndef TARGET_FPMATH_DEFAULT +#define TARGET_FPMATH_DEFAULT \ + (TARGET_64BIT && TARGET_SSE ? FPMATH_SSE : FPMATH_387) +#endif + +#define TARGET_FLOAT_RETURNS_IN_80387 TARGET_FLOAT_RETURNS +/* APPLE LOCAL begin AT&T-style stub 4164563 */ +#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".section __IMPORT,__pointers,non_lazy_symbol_pointers" +/* APPLE LOCAL end AT&T-style stub 4164563 */ + +/* 64bit Sledgehammer mode. For libgcc2 we make sure this is a + compile-time constant. */ +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __x86_64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#else +#ifndef TARGET_BI_ARCH +#undef TARGET_64BIT +#if TARGET_64BIT_DEFAULT +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif +#endif + +#define HAS_LONG_COND_BRANCH 1 +#define HAS_LONG_UNCOND_BRANCH 1 + +#define TARGET_386 (ix86_tune == PROCESSOR_I386) +#define TARGET_486 (ix86_tune == PROCESSOR_I486) +#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM) +#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO) +#define TARGET_K6 (ix86_tune == PROCESSOR_K6) +#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON) +#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4) +#define TARGET_K8 (ix86_tune == PROCESSOR_K8) +#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON) +#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA) +/* APPLE LOCAL mainline */ +#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2) +#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32) +#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) +#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) + +#define TUNEMASK (1 << ix86_tune) +extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; +extern const int x86_use_bit_test, x86_cmove, x86_deep_branch; +extern const int x86_branch_hints, x86_unroll_strlen; +extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx; +extern const int x86_use_himode_fiop, x86_use_simode_fiop; +extern const int x86_use_mov0, x86_use_cltd, x86_read_modify_write; +extern const int x86_read_modify, x86_split_long_moves; +extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix; +extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs; +extern const int x86_promote_hi_regs, x86_integer_DFmode_moves; +extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; +extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; +extern const int x86_accumulate_outgoing_args, x86_prologue_using_move; +extern const int x86_epilogue_using_move, x86_decompose_lea; +extern const int x86_arch_always_fancy_math_387, x86_shift1; +extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs; +extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor; +extern const int x86_use_ffreep; +extern const int x86_inter_unit_moves, x86_schedule; +extern const int x86_use_bt; +/* APPLE LOCAL override options */ +extern int x86_cmpxchg, x86_cmpxchg8b, x86_cmpxchg16b, x86_xadd; +extern const int x86_use_incdec; +extern const int x86_pad_returns; +/* APPLE LOCAL mainline bswap/local override options */ +extern int x86_bswap; +extern const int x86_partial_flag_reg_stall; +extern int x86_prefetch_sse; + +#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK) +#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK) +#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & TUNEMASK) +#define TARGET_USE_BIT_TEST (x86_use_bit_test & TUNEMASK) +#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & TUNEMASK) +/* For sane SSE instruction set generation we need fcomi instruction. It is + safe to enable all CMOVE instructions. */ +#define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE) +#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) +#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK) +#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK) +#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & TUNEMASK) +#define TARGET_USE_SAHF ((x86_use_sahf & TUNEMASK) && !TARGET_64BIT) +#define TARGET_MOVX (x86_movx & TUNEMASK) +#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & TUNEMASK) +#define TARGET_PARTIAL_FLAG_REG_STALL (x86_partial_flag_reg_stall & TUNEMASK) +#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & TUNEMASK) +#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & TUNEMASK) +#define TARGET_USE_MOV0 (x86_use_mov0 & TUNEMASK) +#define TARGET_USE_CLTD (x86_use_cltd & TUNEMASK) +#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & TUNEMASK) +#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & TUNEMASK) +#define TARGET_READ_MODIFY (x86_read_modify & TUNEMASK) +#define TARGET_PROMOTE_QImode (x86_promote_QImode & TUNEMASK) +#define TARGET_FAST_PREFIX (x86_fast_prefix & TUNEMASK) +#define TARGET_SINGLE_STRINGOP (x86_single_stringop & TUNEMASK) +#define TARGET_QIMODE_MATH (x86_qimode_math & TUNEMASK) +#define TARGET_HIMODE_MATH (x86_himode_math & TUNEMASK) +#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & TUNEMASK) +#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & TUNEMASK) +#define TARGET_ADD_ESP_4 (x86_add_esp_4 & TUNEMASK) +#define TARGET_ADD_ESP_8 (x86_add_esp_8 & TUNEMASK) +#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & TUNEMASK) +#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & TUNEMASK) +#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & TUNEMASK) +#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK) +#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ + (x86_sse_partial_reg_dependency & TUNEMASK) +#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK) +#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK) +#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK) +#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK) +#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK) +#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK) +#define TARGET_PREFETCH_SSE (x86_prefetch_sse) +#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK) +#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK) +#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK) +#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK) +#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK) +#define TARGET_SCHEDULE (x86_schedule & TUNEMASK) +#define TARGET_USE_BT (x86_use_bt & TUNEMASK) +#define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK) +#define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK) + +#define ASSEMBLER_DIALECT (ix86_asm_dialect) + +#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0) +#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \ + && (ix86_fpmath & FPMATH_387)) +/* APPLE LOCAL mainline */ +#define TARGET_SSSE3 ((target_flags & MASK_SSSE3) != 0) + +#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU) +#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2) +#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS) +#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN) + +#define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch)) +#define TARGET_CMPXCHG8B (x86_cmpxchg8b & (1 << ix86_arch)) +#define TARGET_CMPXCHG16B (x86_cmpxchg16b & (1 << ix86_arch)) +#define TARGET_XADD (x86_xadd & (1 << ix86_arch)) +/* APPLE LOCAL mainline bswap */ +#define TARGET_BSWAP (x86_bswap & (1 << ix86_arch)) + +#ifndef TARGET_64BIT_DEFAULT +#define TARGET_64BIT_DEFAULT 0 +#endif +#ifndef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT +#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 +#endif + +/* Once GDB has been enhanced to deal with functions without frame + pointers, we can change this to allow for elimination of + the frame pointer in leaf functions. */ +#define TARGET_DEFAULT 0 +/* APPLE LOCAL begin mainline */ +/* Extra bits to force. */ +#define TARGET_SUBTARGET32_DEFAULT 0 + +#define TARGET_SUBTARGET64_DEFAULT 0 +/* APPLE LOCAL end mainline */ + +/* This is not really a target flag, but is done this way so that + it's analogous to similar code for Mach-O on PowerPC. darwin.h + redefines this to 1. */ +#define TARGET_MACHO 0 +/* APPLE LOCAL begin mach-o cleanup */ +#define MACHOPIC_INDIRECT 0 +#define MACHOPIC_PURE 0 +/* APPLE LOCAL end mach-o cleanup */ + +/* Subtargets may reset this to 1 in order to enable 96-bit long double + with the rounding mode forced to 53 bits. */ +#define TARGET_96_ROUND_53_LONG_DOUBLE 0 + +/* Sometimes certain combinations of command options do not make + sense on a particular target machine. You can define a macro + `OVERRIDE_OPTIONS' to take account of this. This macro, if + defined, is executed once just after all the command options have + been parsed. + + Don't use this macro to turn on various extra optimizations for + `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ + +#define OVERRIDE_OPTIONS override_options () + +/* Define this to change the optimizations performed by default. */ +#define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \ + optimization_options ((LEVEL), (SIZE)) + +/* -march=native handling only makes sense with compiler running on + an x86 or x86_64 chip. If changing this condition, also change + the condition in driver-i386.c. */ +#if defined(__i386__) || defined(__x86_64__) +/* In driver-i386.c. */ +extern const char *host_detect_local_cpu (int argc, const char **argv); +#define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, +#define HAVE_LOCAL_CPU_DETECT +#endif + +/* Support for configure-time defaults of some command line options. + The order here is important so that -march doesn't squash the + tune or cpu values. */ +#define OPTION_DEFAULT_SPECS \ + {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \ + {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \ + {"arch", "%{!march=*:-march=%(VALUE)}"} + +/* Specs for the compiler proper */ + +#ifndef CC1_CPU_SPEC +#define CC1_CPU_SPEC_1 "\ +%{!mtune*: \ +%{m386:mtune=i386 \ +%n`-m386' is deprecated. Use `-march=i386' or `-mtune=i386' instead.\n} \ +%{m486:-mtune=i486 \ +%n`-m486' is deprecated. Use `-march=i486' or `-mtune=i486' instead.\n} \ +%{mpentium:-mtune=pentium \ +%n`-mpentium' is deprecated. Use `-march=pentium' or `-mtune=pentium' instead.\n} \ +%{mpentiumpro:-mtune=pentiumpro \ +%n`-mpentiumpro' is deprecated. Use `-march=pentiumpro' or `-mtune=pentiumpro' instead.\n} \ +%{mcpu=*:-mtune=%* \ +%n`-mcpu=' is deprecated. Use `-mtune=' or '-march=' instead.\n}} \ +%<mcpu=* \ +%{mintel-syntax:-masm=intel \ +%n`-mintel-syntax' is deprecated. Use `-masm=intel' instead.\n} \ +%{mno-intel-syntax:-masm=att \ +%n`-mno-intel-syntax' is deprecated. Use `-masm=att' instead.\n}" + +#ifndef HAVE_LOCAL_CPU_DETECT +#define CC1_CPU_SPEC CC1_CPU_SPEC_1 +#else +#define CC1_CPU_SPEC CC1_CPU_SPEC_1 \ +"%{march=native:%<march=native %:local_cpu_detect(arch) \ + %{!mtune=*:%<mtune=native %:local_cpu_detect(tune)}} \ +%{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" +#endif +#endif + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + size_t arch_len = strlen (ix86_arch_string); \ + size_t tune_len = strlen (ix86_tune_string); \ + int last_arch_char = ix86_arch_string[arch_len - 1]; \ + int last_tune_char = ix86_tune_string[tune_len - 1]; \ + \ + if (TARGET_64BIT) \ + { \ + builtin_assert ("cpu=x86_64"); \ + builtin_assert ("machine=x86_64"); \ + builtin_define ("__amd64"); \ + builtin_define ("__amd64__"); \ + builtin_define ("__x86_64"); \ + builtin_define ("__x86_64__"); \ + } \ + else \ + { \ + builtin_assert ("cpu=i386"); \ + builtin_assert ("machine=i386"); \ + builtin_define_std ("i386"); \ + } \ + \ + /* Built-ins based on -mtune= (or -march= if no \ + -mtune= given). */ \ + if (TARGET_386) \ + builtin_define ("__tune_i386__"); \ + else if (TARGET_486) \ + builtin_define ("__tune_i486__"); \ + else if (TARGET_PENTIUM) \ + { \ + builtin_define ("__tune_i586__"); \ + builtin_define ("__tune_pentium__"); \ + if (last_tune_char == 'x') \ + builtin_define ("__tune_pentium_mmx__"); \ + } \ + else if (TARGET_PENTIUMPRO) \ + { \ + builtin_define ("__tune_i686__"); \ + builtin_define ("__tune_pentiumpro__"); \ + switch (last_tune_char) \ + { \ + case '3': \ + builtin_define ("__tune_pentium3__"); \ + /* FALLTHRU */ \ + case '2': \ + builtin_define ("__tune_pentium2__"); \ + break; \ + } \ + } \ + else if (TARGET_K6) \ + { \ + builtin_define ("__tune_k6__"); \ + if (last_tune_char == '2') \ + builtin_define ("__tune_k6_2__"); \ + else if (last_tune_char == '3') \ + builtin_define ("__tune_k6_3__"); \ + } \ + else if (TARGET_ATHLON) \ + { \ + builtin_define ("__tune_athlon__"); \ + /* Only plain "athlon" lacks SSE. */ \ + if (last_tune_char != 'n') \ + builtin_define ("__tune_athlon_sse__"); \ + } \ + else if (TARGET_K8) \ + builtin_define ("__tune_k8__"); \ + else if (TARGET_PENTIUM4) \ + builtin_define ("__tune_pentium4__"); \ + else if (TARGET_NOCONA) \ + builtin_define ("__tune_nocona__"); \ + /* APPLE LOCAL begin mainline */ \ + else if (TARGET_CORE2) \ + builtin_define ("__tune_core2__"); \ + /* APPLE LOCAL end mainline */ \ + \ + if (TARGET_MMX) \ + builtin_define ("__MMX__"); \ + if (TARGET_3DNOW) \ + builtin_define ("__3dNOW__"); \ + if (TARGET_3DNOW_A) \ + builtin_define ("__3dNOW_A__"); \ + if (TARGET_SSE) \ + builtin_define ("__SSE__"); \ + if (TARGET_SSE2) \ + builtin_define ("__SSE2__"); \ + if (TARGET_SSE3) \ + builtin_define ("__SSE3__"); \ + /* APPLE LOCAL begin mainline */ \ + if (TARGET_SSSE3) \ + builtin_define ("__SSSE3__"); \ + /* APPLE LOCAL end mainline */ \ + /* APPLE LOCAL begin 5612787 mainline sse4 */ \ + if (TARGET_SSE4_1) \ + builtin_define ("__SSE4_1__"); \ + if (TARGET_SSE4_2) \ + builtin_define ("__SSE4_2__"); \ + if (TARGET_SSE4A) \ + builtin_define ("__SSE4A__"); \ + /* APPLE LOCAL end 5612787 mainline sse4 */ \ + if (TARGET_SSE_MATH && TARGET_SSE) \ + builtin_define ("__SSE_MATH__"); \ + if (TARGET_SSE_MATH && TARGET_SSE2) \ + builtin_define ("__SSE2_MATH__"); \ + \ + /* Built-ins based on -march=. */ \ + if (ix86_arch == PROCESSOR_I486) \ + { \ + builtin_define ("__i486"); \ + builtin_define ("__i486__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUM) \ + { \ + builtin_define ("__i586"); \ + builtin_define ("__i586__"); \ + builtin_define ("__pentium"); \ + builtin_define ("__pentium__"); \ + if (last_arch_char == 'x') \ + builtin_define ("__pentium_mmx__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUMPRO) \ + { \ + builtin_define ("__i686"); \ + builtin_define ("__i686__"); \ + builtin_define ("__pentiumpro"); \ + builtin_define ("__pentiumpro__"); \ + } \ + else if (ix86_arch == PROCESSOR_K6) \ + { \ + \ + builtin_define ("__k6"); \ + builtin_define ("__k6__"); \ + if (last_arch_char == '2') \ + builtin_define ("__k6_2__"); \ + else if (last_arch_char == '3') \ + builtin_define ("__k6_3__"); \ + } \ + else if (ix86_arch == PROCESSOR_ATHLON) \ + { \ + builtin_define ("__athlon"); \ + builtin_define ("__athlon__"); \ + /* Only plain "athlon" lacks SSE. */ \ + if (last_arch_char != 'n') \ + builtin_define ("__athlon_sse__"); \ + } \ + else if (ix86_arch == PROCESSOR_K8) \ + { \ + builtin_define ("__k8"); \ + builtin_define ("__k8__"); \ + } \ + else if (ix86_arch == PROCESSOR_PENTIUM4) \ + { \ + builtin_define ("__pentium4"); \ + builtin_define ("__pentium4__"); \ + } \ + else if (ix86_arch == PROCESSOR_NOCONA) \ + { \ + builtin_define ("__nocona"); \ + builtin_define ("__nocona__"); \ + } \ + /* APPLE LOCAL begin mainline */ \ + else if (ix86_arch == PROCESSOR_CORE2) \ + { \ + builtin_define ("__core2"); \ + builtin_define ("__core2__"); \ + } \ + /* APPLE LOCAL end mainline */ \ + } \ + while (0) + +#define TARGET_CPU_DEFAULT_i386 0 +#define TARGET_CPU_DEFAULT_i486 1 +#define TARGET_CPU_DEFAULT_pentium 2 +#define TARGET_CPU_DEFAULT_pentium_mmx 3 +#define TARGET_CPU_DEFAULT_pentiumpro 4 +#define TARGET_CPU_DEFAULT_pentium2 5 +#define TARGET_CPU_DEFAULT_pentium3 6 +#define TARGET_CPU_DEFAULT_pentium4 7 +#define TARGET_CPU_DEFAULT_k6 8 +#define TARGET_CPU_DEFAULT_k6_2 9 +#define TARGET_CPU_DEFAULT_k6_3 10 +#define TARGET_CPU_DEFAULT_athlon 11 +#define TARGET_CPU_DEFAULT_athlon_sse 12 +#define TARGET_CPU_DEFAULT_k8 13 +#define TARGET_CPU_DEFAULT_pentium_m 14 +#define TARGET_CPU_DEFAULT_prescott 15 +#define TARGET_CPU_DEFAULT_nocona 16 +#define TARGET_CPU_DEFAULT_generic 17 +/* APPLE LOCAL mainline */ +#define TARGET_CPU_DEFAULT_core2 18 +/* APPLE LOCAL begin mainline */ +#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\ + "pentiumpro", "pentium2", "pentium3", \ + "pentium4", "k6", "k6-2", "k6-3",\ + "athlon", "athlon-4", "k8", \ + "pentium-m", "prescott", "nocona", \ + "generic", "core2" } +/* APPLE LOCAL end mainline */ + +#ifndef CC1_SPEC +#define CC1_SPEC "%(cc1_cpu) " +#endif + +/* This macro defines names of additional specifications to put in the + specs that can be used in various specifications like CC1_SPEC. Its + definition is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#define EXTRA_SPECS \ + { "cc1_cpu", CC1_CPU_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +/* target machine storage layout */ + +#define LONG_DOUBLE_TYPE_SIZE 80 + +/* Set the value of FLT_EVAL_METHOD in float.h. When using only the + FPU, assume that the fpcw is set to extended precision; when using + only SSE, rounding is correct; when using both SSE and the FPU, + the rounding precision is indeterminate, since either may be chosen + apparently at random. */ +#define TARGET_FLT_EVAL_METHOD \ + (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2) + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define FLOAT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE BITS_PER_WORD +#define DOUBLE_TYPE_SIZE 64 +#define LONG_LONG_TYPE_SIZE 64 + +#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT +#define MAX_BITS_PER_WORD 64 +#else +#define MAX_BITS_PER_WORD 32 +#endif + +/* Define this if most significant byte of a word is the lowest numbered. */ +/* That is true on the 80386. */ + +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +/* That is not true on the 80386. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +/* Not true for 80386 */ +#define WORDS_BIG_ENDIAN 0 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) +#ifdef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) +#else +#define MIN_UNITS_PER_WORD 4 +#endif + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY BITS_PER_WORD + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +/* APPLE LOCAL begin compiler should obey -mpreferred-stack-boundary (radar 3232990) */ +/* prefer * #define STACK_BOUNDARY ((ix86_preferred_stack_boundary > 128) ? 128 : ix86_preferred_stack_boundary) */ +/* We're going to extremes to yield a result of indeterminite + signedness here; this macro will be expanded in signed and + unsigned contexts, and mixed signedness induces fatal + warnings. Radar 3941684. */ +#define STACK_BOUNDARY ((ix86_preferred_stack_boundary >= 128) ? 128 : \ + (ix86_preferred_stack_boundary == 64) ? 64 : 32) +/* APPLE LOCAL end compiler should obey -mpreferred-stack-boundary (radar 3232990) */ + +/* Boundary (in *bits*) on which the stack pointer prefers to be + aligned; the compiler cannot rely on having this alignment. */ +#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary + +/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */ +#define SAVE_PREFERRED_STACK_BOUNDARY ix86_save_preferred_stack_boundary +/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */ + +/* As of July 2001, many runtimes do not align the stack properly when + entering main. This causes expand_main_function to forcibly align + the stack, which results in aligned frames for functions called from + main, though it does nothing for the alignment of main itself. */ +#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \ + (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT) + +/* Minimum allocation boundary for the code of a function. */ +#define FUNCTION_BOUNDARY 8 + +/* C++ stores the virtual bit in the lowest bit of function pointers. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_pfn + +/* Alignment of field after `int : 0' in a structure. */ + +#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD + +/* Minimum size in bits of the largest boundary to which any + and all fundamental data types supported by the hardware + might need to be aligned. No data type wants to be aligned + rounder than this. + + Pentium+ prefers DFmode values to be aligned to 64 bit boundary + and Pentium Pro XFmode values at 128 bit boundaries. */ + +#define BIGGEST_ALIGNMENT 128 + +/* Decide whether a variable of mode MODE should be 128 bit aligned. */ +#define ALIGN_MODE_128(MODE) \ + ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) + +/* The published ABIs say that doubles should be aligned on word + boundaries, so lower the alignment for structure fields unless + -malign-double is set. */ + +/* ??? Blah -- this macro is used directly by libobjc. Since it + supports no vector modes, cut out the complexity and fall back + on BIGGEST_FIELD_ALIGNMENT. */ +#ifdef IN_TARGET_LIBS +#ifdef __x86_64__ +#define BIGGEST_FIELD_ALIGNMENT 128 +#else +#define BIGGEST_FIELD_ALIGNMENT 32 +#endif +#else +#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ + x86_field_alignment (FIELD, COMPUTED) +#endif + +/* If defined, a C expression to compute the alignment given to a + constant that is being placed in memory. EXP is the constant + and ALIGN is the alignment that the object would ordinarily have. + The value of this macro is used instead of that alignment to align + the object. + + If this macro is not defined, then ALIGN is used. + + The typical use of this macro is to increase alignment for string + constants to be word aligned so that `strcpy' calls that copy + constants can be done inline. */ + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) ix86_constant_alignment ((EXP), (ALIGN)) + +/* If defined, a C expression to compute the alignment for a static + variable. TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. + + If this macro is not defined, then ALIGN is used. + + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. Another is to + cause character arrays to be word-aligned so that `strcpy' calls + that copy constants to character arrays can be done inline. */ + +#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN)) + +/* If defined, a C expression to compute the alignment for a local + variable. TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. + + If this macro is not defined, then ALIGN is used. + + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. */ + +#define LOCAL_ALIGNMENT(TYPE, ALIGN) ix86_local_alignment ((TYPE), (ALIGN)) + +/* If defined, a C expression that gives the alignment boundary, in + bits, of an argument with the specified mode and type. If it is + not defined, `PARM_BOUNDARY' is used for all arguments. */ + +#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \ + ix86_function_arg_boundary ((MODE), (TYPE)) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 0 + +/* If bit field type is int, don't let it cross an int, + and give entire struct the alignment of an int. */ +/* Required on the 386 since it doesn't have bit-field insns. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* Standard register usage. */ + +/* This processor has special stack-like registers. See reg-stack.c + for details. */ + +#define STACK_REGS +#define IS_STACK_MODE(MODE) \ + (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \ + || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \ + || (MODE) == XFmode) + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + In the 80386 we give the 8 general purpose registers the numbers 0-7. + We number the floating point registers 8-15. + Note that registers 0-7 can be accessed as a short or int, + while only 0-3 may be used with byte `mov' instructions. + + Reg 16 does not correspond to any hardware register, but instead + appears in the RTL as an argument pointer prior to reload, and is + eliminated during reloading in favor of either the stack or frame + pointer. */ + +#define FIRST_PSEUDO_REGISTER 53 + +/* Number of hardware registers that go into the DWARF-2 unwind info. + If not defined, equals FIRST_PSEUDO_REGISTER. */ + +#define DWARF_FRAME_REGISTERS 17 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + On the 80386, the stack pointer is such, as is the arg pointer. + + The value is zero if the register is not fixed on either 32 or + 64 bit targets, one if the register if fixed on both 32 and 64 + bit targets, two if it is only fixed on 32bit targets and three + if its only fixed on 64bit targets. + Proper values are computed in the CONDITIONAL_REGISTER_USAGE. + */ +#define FIXED_REGISTERS \ +/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \ +{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ +/*arg,flags,fpsr,dir,frame*/ \ + 1, 1, 1, 1, 1, \ +/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/* r8, r9, r10, r11, r12, r13, r14, r15*/ \ + 2, 2, 2, 2, 2, 2, 2, 2, \ +/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ + 2, 2, 2, 2, 2, 2, 2, 2} + + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. + + The value is zero if the register is not call used on either 32 or + 64 bit targets, one if the register if call used on both 32 and 64 + bit targets, two if it is only call used on 32bit targets and three + if its only call used on 64bit targets. + Proper values are computed in the CONDITIONAL_REGISTER_USAGE. +*/ +#define CALL_USED_REGISTERS \ +/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \ +{ 1, 1, 1, 0, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*arg,flags,fpsr,dir,frame*/ \ + 1, 1, 1, 1, 1, \ +/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ +/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ +/* r8, r9, r10, r11, r12, r13, r14, r15*/ \ + 1, 1, 1, 1, 2, 2, 2, 2, \ +/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ + 1, 1, 1, 1, 1, 1, 1, 1} \ + +/* Order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. List frame pointer + late and fixed registers last. Note that, in general, we prefer + registers listed in CALL_USED_REGISTERS, keeping the others + available for storage of persistent values. + + The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order, + so this is just empty initializer for array. */ + +#define REG_ALLOC_ORDER \ +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\ + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51, 52 } + +/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order + to be rearranged based on a particular function. When using sse math, + we want to allocate SSE before x87 registers and vice vera. */ + +#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc () + + +/* Macro to conditionally modify fixed_regs/call_used_regs. */ +#define CONDITIONAL_REGISTER_USAGE \ +do { \ + int i; \ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \ + { \ + if (fixed_regs[i] > 1) \ + fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); \ + if (call_used_regs[i] > 1) \ + call_used_regs[i] = (call_used_regs[i] \ + == (TARGET_64BIT ? 3 : 2)); \ + } \ + if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) \ + { \ + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \ + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \ + } \ + if (! TARGET_MMX) \ + { \ + int i; \ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \ + if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) \ + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \ + } \ + if (! TARGET_SSE) \ + { \ + int i; \ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \ + if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) \ + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \ + } \ + if (! TARGET_80387 && ! TARGET_FLOAT_RETURNS_IN_80387) \ + { \ + int i; \ + HARD_REG_SET x; \ + COPY_HARD_REG_SET (x, reg_class_contents[(int)FLOAT_REGS]); \ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \ + if (TEST_HARD_REG_BIT (x, i)) \ + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \ + } \ + if (! TARGET_64BIT) \ + { \ + int i; \ + for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) \ + reg_names[i] = ""; \ + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) \ + reg_names[i] = ""; \ + } \ + } while (0) + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + Actually there are no two word move instructions for consecutive + registers. And only registers 0-3 may have mov byte instructions + applied to them. + */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ + ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ + : ((MODE) == XFmode \ + ? (TARGET_64BIT ? 2 : 3) \ + : (MODE) == XCmode \ + ? (TARGET_64BIT ? 4 : 6) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))) + +#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE) \ + ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT) \ + ? (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ + ? 0 \ + : ((MODE) == XFmode || (MODE) == XCmode)) \ + : 0) + +#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8) + +#define VALID_SSE2_REG_MODE(MODE) \ + ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ + || (MODE) == V2DImode || (MODE) == DFmode) + +#define VALID_SSE_REG_MODE(MODE) \ + ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \ + || (MODE) == SFmode || (MODE) == TFmode) + +#define VALID_MMX_REG_MODE_3DNOW(MODE) \ + ((MODE) == V2SFmode || (MODE) == SFmode) + +#define VALID_MMX_REG_MODE(MODE) \ + ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ +/* APPLE LOCAL 4656532 use V1DImode for _m64 */ \ + || (MODE) == V2SImode || (MODE) == SImode || (MODE) == V1DImode) + +/* ??? No autovectorization into MMX or 3DNOW until we can reliably + place emms and femms instructions. */ +#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD) + +#define VALID_FP_MODE_P(MODE) \ + ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ + || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \ + +#define VALID_INT_MODE_P(MODE) \ + ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode \ + || (MODE) == DImode \ + || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode \ + || (MODE) == CDImode \ + || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode \ + || (MODE) == TFmode || (MODE) == TCmode))) + +/* Return true for modes passed in SSE registers. */ +#define SSE_REG_MODE_P(MODE) \ + ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \ + || (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \ + || (MODE) == V4SFmode || (MODE) == V4SImode) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + ix86_hard_regno_mode_ok ((REGNO), (MODE)) + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) ix86_modes_tieable_p (MODE1, MODE2) + +/* It is possible to write patterns to move flags; but until someone + does it, */ +#define AVOID_CCMODE_COPIES + +/* Specify the modes required to caller save a given hard regno. + We do this on i386 to prevent flags from being saved at all. + + Kill any attempts to combine saving of modes. */ + +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + (CC_REGNO_P (REGNO) ? VOIDmode \ + : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode \ + : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false)\ + : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode \ + : (MODE) == QImode && (REGNO) >= 4 && !TARGET_64BIT ? SImode \ + : (MODE)) +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* on the 386 the pc register is %eip, and is not usable as a general + register. The ordinary mov instructions won't work */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 7 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 6 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 20 + +/* First floating point reg */ +#define FIRST_FLOAT_REG 8 + +/* First & last stack-like regs */ +#define FIRST_STACK_REG FIRST_FLOAT_REG +#define LAST_STACK_REG (FIRST_FLOAT_REG + 7) + +#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1) +#define LAST_SSE_REG (FIRST_SSE_REG + 7) + +#define FIRST_MMX_REG (LAST_SSE_REG + 1) +#define LAST_MMX_REG (FIRST_MMX_REG + 7) + +#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) +#define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7) + +#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) +#define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7) + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms + may be accessed via the stack pointer) in functions that seem suitable. + This is computed in `reload', in reload1.c. */ +#define FRAME_POINTER_REQUIRED ix86_frame_pointer_required () + +/* Override this in other tm.h files to cope with various OS lossage + requiring a frame pointer. */ +#ifndef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 +#endif + +/* Make sure we can access arbitrary call frames. */ +#define SETUP_FRAME_ADDRESSES() ix86_setup_frame_addresses () + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 16 + +/* Register in which static-chain is passed to a function. + We do use ECX as static chain register for 32 bit ABI. On the + 64bit ABI, ECX is an argument register, so we use R10 instead. */ +#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? FIRST_REX_INT_REG + 10 - 8 : 2) + +/* Register to hold the addressing base for position independent + code access to data items. We don't use PIC pointer for 64bit + mode. Define the regnum to dummy value to prevent gcc from + pessimizing code dealing with EBX. + + To avoid clobbering a call-saved register unnecessarily, we renumber + the pic register when possible. The change is visible after the + prologue has been emitted. */ + +#define REAL_PIC_OFFSET_TABLE_REGNUM 3 + +#define PIC_OFFSET_TABLE_REGNUM \ + /* APPLE LOCAL begin 5695218 */ \ + ((TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC) \ + || !flag_pic ? INVALID_REGNUM \ + : reload_completed && pic_offset_table_rtx ? REGNO (pic_offset_table_rtx) \ + : REAL_PIC_OFFSET_TABLE_REGNUM) \ + /* APPLE LOCAL end 5695218 */ + +#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_" + +/* A C expression which can inhibit the returning of certain function + values in registers, based on the type of value. A nonzero value + says to return the function value in memory, just as large + structures are always returned. Here TYPE will be a C expression + of type `tree', representing the data type of the value. + + Note that values of mode `BLKmode' must be explicitly handled by + this macro. Also, the option `-fpcc-struct-return' takes effect + regardless of this macro. On most systems, it is possible to + leave the macro undefined; this causes a default definition to be + used, whose value is the constant 1 for `BLKmode' values, and 0 + otherwise. + + Do not use this macro to indicate that structures and unions + should always be returned in memory. You should instead use + `DEFAULT_PCC_STRUCT_RETURN' to indicate this. */ + +#define RETURN_IN_MEMORY(TYPE) \ + ix86_return_in_memory (TYPE) + +/* APPLE LOCAL begin radar 4781080 */ +#define OBJC_FPRETURN_MSGCALL(TYPE,WHICH) \ + ix86_objc_fpreturn_msgcall (TYPE, WHICH) +/* APPLE LOCAL end radar 4781080 */ + +/* This is overridden by <cygwin.h>. */ +#define MS_AGGREGATE_RETURN 0 + +/* This is overridden by <netware.h>. */ +#define KEEP_AGGREGATE_RETURN_POINTER 0 + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. + + It might seem that class BREG is unnecessary, since no useful 386 + opcode needs reg %ebx. But some systems pass args to the OS in ebx, + and the "b" register constraint is useful in asms for syscalls. + + The flags and fpsr registers are in no class. */ + +enum reg_class +{ + NO_REGS, + AREG, DREG, CREG, BREG, SIREG, DIREG, + AD_REGS, /* %eax/%edx for DImode */ + Q_REGS, /* %eax %ebx %ecx %edx */ + NON_Q_REGS, /* %esi %edi %ebp %esp */ + INDEX_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp */ + LEGACY_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp */ + GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/ + FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */ + FLOAT_REGS, + /* APPLE LOCAL 5612787 mainline sse4 */ + SSE_FIRST_REG, + SSE_REGS, + MMX_REGS, + FP_TOP_SSE_REGS, + FP_SECOND_SSE_REGS, + FLOAT_SSE_REGS, + FLOAT_INT_REGS, + INT_SSE_REGS, + FLOAT_INT_SSE_REGS, + ALL_REGS, LIM_REG_CLASSES +}; + +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +#define INTEGER_CLASS_P(CLASS) \ + reg_class_subset_p ((CLASS), GENERAL_REGS) +#define FLOAT_CLASS_P(CLASS) \ + reg_class_subset_p ((CLASS), FLOAT_REGS) +#define SSE_CLASS_P(CLASS) \ + ((CLASS) == SSE_REGS) +#define MMX_CLASS_P(CLASS) \ + ((CLASS) == MMX_REGS) +#define MAYBE_INTEGER_CLASS_P(CLASS) \ + reg_classes_intersect_p ((CLASS), GENERAL_REGS) +#define MAYBE_FLOAT_CLASS_P(CLASS) \ + reg_classes_intersect_p ((CLASS), FLOAT_REGS) +#define MAYBE_SSE_CLASS_P(CLASS) \ + reg_classes_intersect_p (SSE_REGS, (CLASS)) +#define MAYBE_MMX_CLASS_P(CLASS) \ + reg_classes_intersect_p (MMX_REGS, (CLASS)) + +#define Q_CLASS_P(CLASS) \ + reg_class_subset_p ((CLASS), Q_REGS) + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ +{ "NO_REGS", \ + "AREG", "DREG", "CREG", "BREG", \ + "SIREG", "DIREG", \ + "AD_REGS", \ + "Q_REGS", "NON_Q_REGS", \ + "INDEX_REGS", \ + "LEGACY_REGS", \ + "GENERAL_REGS", \ + "FP_TOP_REG", "FP_SECOND_REG", \ + "FLOAT_REGS", \ + /* APPLE LOCAL 5612787 mainline sse4 */ \ + "SSE_FIRST_REG", \ + "SSE_REGS", \ + "MMX_REGS", \ + "FP_TOP_SSE_REGS", \ + "FP_SECOND_SSE_REGS", \ + "FLOAT_SSE_REGS", \ + "FLOAT_INT_REGS", \ + "INT_SSE_REGS", \ + "FLOAT_INT_SSE_REGS", \ + "ALL_REGS" } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ { 0x00, 0x0 }, \ + { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \ + { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \ + { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \ + { 0x03, 0x0 }, /* AD_REGS */ \ + { 0x0f, 0x0 }, /* Q_REGS */ \ + { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \ + { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \ + { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \ + { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \ + { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\ + { 0xff00, 0x0 }, /* FLOAT_REGS */ \ +/* APPLE LOCAL 5612787 mainline sse4 */ \ + { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \ +{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \ +{ 0xe0000000, 0x1f }, /* MMX_REGS */ \ +{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \ +{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \ +{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \ + { 0x1ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \ +{ 0x1fe100ff,0x1fffe0 }, /* INT_SSE_REGS */ \ +{ 0x1fe1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \ +{ 0xffffffff,0x1fffff } \ +} + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO]) + +/* When defined, the compiler allows registers explicitly used in the + rtl to be used as spill registers but prevents the compiler from + extending the lifetime of these registers. */ + +#define SMALL_REGISTER_CLASSES 1 + +#define QI_REG_P(X) \ + (REG_P (X) && REGNO (X) < 4) + +#define GENERAL_REGNO_P(N) \ + ((N) < 8 || REX_INT_REGNO_P (N)) + +#define GENERAL_REG_P(X) \ + (REG_P (X) && GENERAL_REGNO_P (REGNO (X))) + +#define ANY_QI_REG_P(X) (TARGET_64BIT ? GENERAL_REG_P(X) : QI_REG_P (X)) + +#define NON_QI_REG_P(X) \ + (REG_P (X) && REGNO (X) >= 4 && REGNO (X) < FIRST_PSEUDO_REGISTER) + +#define REX_INT_REGNO_P(N) ((N) >= FIRST_REX_INT_REG && (N) <= LAST_REX_INT_REG) +#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X))) + +#define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X))) +#define FP_REGNO_P(N) ((N) >= FIRST_STACK_REG && (N) <= LAST_STACK_REG) +#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X))) +#define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N)) + +#define SSE_REGNO_P(N) \ + (((N) >= FIRST_SSE_REG && (N) <= LAST_SSE_REG) \ + || ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG)) + +#define REX_SSE_REGNO_P(N) \ + ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG) + +#define SSE_REGNO(N) \ + ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8) +#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N))) + +#define SSE_FLOAT_MODE_P(MODE) \ + ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) + +#define MMX_REGNO_P(N) ((N) >= FIRST_MMX_REG && (N) <= LAST_MMX_REG) +#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP))) + +#define STACK_REG_P(XOP) \ + (REG_P (XOP) && \ + REGNO (XOP) >= FIRST_STACK_REG && \ + REGNO (XOP) <= LAST_STACK_REG) + +#define NON_STACK_REG_P(XOP) (REG_P (XOP) && ! STACK_REG_P (XOP)) + +#define STACK_TOP_P(XOP) (REG_P (XOP) && REGNO (XOP) == FIRST_STACK_REG) + +#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X))) +#define CC_REGNO_P(X) ((X) == FLAGS_REG || (X) == FPSR_REG) + +/* The class value for index registers, and the one for base regs. */ + +#define INDEX_REG_CLASS INDEX_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Place additional restrictions on the register class to use when it + is necessary to be able to hold a value of mode MODE in a reload + register for which class CLASS would ordinarily be used. */ + +#define LIMIT_RELOAD_CLASS(MODE, CLASS) \ + ((MODE) == QImode && !TARGET_64BIT \ + && ((CLASS) == ALL_REGS || (CLASS) == GENERAL_REGS \ + || (CLASS) == LEGACY_REGS || (CLASS) == INDEX_REGS) \ + ? Q_REGS : (CLASS)) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. + On the 80386 series, we prevent floating constants from being + reloaded into floating registers (since no move-insn can do that) + and we ensure that QImodes aren't reloaded into the esi or edi reg. */ + +/* Put float CONST_DOUBLE in the constant pool instead of fp regs. + QImode must go into class Q_REGS. + Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and + movdf to do mem-to-mem moves through integer regs. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + ix86_preferred_reload_class ((X), (CLASS)) + +/* Discourage putting floating-point values in SSE registers unless + SSE math is being used, and likewise for the 387 registers. */ + +#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \ + ix86_preferred_output_reload_class ((X), (CLASS)) + +/* If we are copying between general and FP registers, we need a memory + location. The same is true for SSE and MMX registers. */ +#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ + ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1) + +/* QImode spills from non-QI registers need a scratch. This does not + happen often -- the only example so far requires an uninitialized + pseudo. */ + +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, OUT) \ + (((CLASS) == GENERAL_REGS || (CLASS) == LEGACY_REGS \ + || (CLASS) == INDEX_REGS) && !TARGET_64BIT && (MODE) == QImode \ + ? Q_REGS : NO_REGS) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +/* On the 80386, this is the size of MODE in words, + except in the FP regs, where a single reg is always enough. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (!MAYBE_INTEGER_CLASS_P (CLASS) \ + ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ + : (((((MODE) == XFmode ? 12 : GET_MODE_SIZE (MODE))) \ + + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* A C expression whose value is nonzero if pseudos that have been + assigned to registers of class CLASS would likely be spilled + because registers of CLASS are needed for spill registers. + + The default value of this macro returns 1 if CLASS has exactly one + register and zero otherwise. On most machines, this default + should be used. Only define this macro to some other expression + if pseudo allocated by `local-alloc.c' end up in memory because + their hard registers were needed for spill registers. If this + macro returns nonzero for those classes, those pseudos will only + be allocated by `global.c', which knows how to reallocate the + pseudo to another register. If there would not be another + register available for reallocation, you should not change the + definition of this macro since the only effect of such a + definition would be to slow down register allocation. */ + +#define CLASS_LIKELY_SPILLED_P(CLASS) \ + (((CLASS) == AREG) \ + || ((CLASS) == DREG) \ + || ((CLASS) == CREG) \ + || ((CLASS) == BREG) \ + || ((CLASS) == AD_REGS) \ + || ((CLASS) == SIREG) \ + || ((CLASS) == DIREG) \ + || ((CLASS) == FP_TOP_REG) \ + || ((CLASS) == FP_SECOND_REG)) + +/* Return a class of registers that cannot change FROM mode to TO mode. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + ix86_cannot_change_mode_class (FROM, TO, CLASS) + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On 386, we have pushw instruction that decrements by exactly 2 no + matter what the position was, there is no pushb. + But as CIE data alignment factor on this arch is -4, we need to make + sure all stack pointer adjustments are in multiple of 4. + + For 64bit ABI we round up to 8 bytes. + */ + +#define PUSH_ROUNDING(BYTES) \ + (TARGET_64BIT \ + ? (((BYTES) + 7) & (-8)) \ + : (((BYTES) + 3) & (-4))) + +/* If defined, the maximum amount of space required for outgoing arguments will + be computed and placed into the variable + `current_function_outgoing_args_size'. No space will be pushed onto the + stack for each call; instead, the function prologue should increase the stack + frame size by this amount. */ + +#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS + +/* If defined, a C expression whose value is nonzero when we want to use PUSH + instructions to pass outgoing arguments. */ + +#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS) + +/* We want the stack and args grow in opposite directions, even if + PUSH_ARGS is 0. */ +#define PUSH_ARGS_REVERSED 1 + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Define this macro if functions should assume that stack space has been + allocated for arguments even when their values are passed in registers. + + The value of this macro is the size, in bytes, of the area reserved for + arguments passed in registers for the function represented by FNDECL. + + This space can be allocated by the caller, or be a part of the + machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says + which. */ +#define REG_PARM_STACK_SPACE(FNDECL) 0 + +/* Value is the number of bytes of arguments automatically + popped when returning from a subroutine call. + FUNDECL is the declaration node of the function (as a tree), + FUNTYPE is the data type of the function (as a tree), + or for a library call it is an identifier node for the subroutine name. + SIZE is the number of bytes of arguments passed on the stack. + + On the 80386, the RTD insn may be used to pop them if the number + of args is fixed, but if the number is variable then the caller + must pop them all. RTD can't be used for library calls now + because the library is compiled with the Unix compiler. + Use of RTD is a selectable option, since it is incompatible with + standard Unix calling sequences. If the option is not selected, + the caller must always pop the args. + + The attribute stdcall is equivalent to RTD on a per module basis. */ + +#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \ + ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE)) + +#define FUNCTION_VALUE_REGNO_P(N) \ + ix86_function_value_regno_p (N) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) \ + ix86_libcall_value (MODE) + +/* Define the size of the result block used for communication between + untyped_call and untyped_return. The block contains a DImode value + followed by the block used by fnsave and frstor. */ + +#define APPLY_RESULT_SIZE (8+108) + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. */ + +typedef struct ix86_args { + int words; /* # words passed so far */ + int nregs; /* # registers available for passing */ + int regno; /* next available register number */ + int fastcall; /* fastcall calling convention is used */ + int sse_words; /* # sse words passed so far */ + int sse_nregs; /* # sse registers available for passing */ + int warn_sse; /* True when we want to warn about SSE ABI. */ + int warn_mmx; /* True when we want to warn about MMX ABI. */ + int sse_regno; /* next available sse register number */ + int mmx_words; /* # mmx words passed so far */ + int mmx_nregs; /* # mmx registers available for passing */ + int mmx_regno; /* next available mmx register number */ + int maybe_vaarg; /* true for calls to possibly vardic fncts. */ + int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should + be passed in SSE registers. Otherwise 0. */ +} CUMULATIVE_ARGS; + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL)) + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ + function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED)) + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ + +#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ + function_arg (&(CUM), (MODE), (TYPE), (NAMED)) + +/* Implement `va_start' for varargs and stdarg. */ +#define EXPAND_BUILTIN_VA_START(VALIST, NEXTARG) \ + ix86_va_start (VALIST, NEXTARG) + +#define TARGET_ASM_FILE_END ix86_file_end +#define NEED_INDICATE_EXEC_STACK 0 + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO) + +#define MCOUNT_NAME "_mcount" + +#define PROFILE_COUNT_REGISTER "edx" + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ +/* Note on the 386 it might be more efficient not to define this since + we have to restore it ourselves from the frame pointer, in order to + use pop */ + +#define EXIT_IGNORE_STACK 1 + +/* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts. */ + +/* On the 386, the trampoline contains two instructions: + mov #STATIC,ecx + jmp FUNCTION + The trampoline is generated entirely at runtime. The operand of JMP + is the address of FUNCTION relative to the instruction following the + JMP (which is 5 bytes long). */ + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 23 : 10) + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ + x86_initialize_trampoline ((TRAMP), (FNADDR), (CXT)) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + There are two registers that can always be eliminated on the i386. + The frame pointer and the arg pointer can be replaced by either the + hard frame pointer or to the stack pointer, depending upon the + circumstances. The hard frame pointer is not used before reload and + so it is not eligible for elimination. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} \ + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. + + All other eliminations are valid. */ + +#define CAN_ELIMINATE(FROM, TO) \ + ((TO) == STACK_POINTER_REGNUM ? ! frame_pointer_needed : 1) + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = ix86_initial_elimination_offset ((FROM), (TO))) + +/* Addressing modes, and classification of registers for them. */ + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + ((REGNO) < STACK_POINTER_REGNUM \ + || (REGNO >= FIRST_REX_INT_REG \ + && (REGNO) <= LAST_REX_INT_REG) \ + || ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \ + && (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \ + || (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM) + +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) <= STACK_POINTER_REGNUM \ + || (REGNO) == ARG_POINTER_REGNUM \ + || (REGNO) == FRAME_POINTER_REGNUM \ + || (REGNO >= FIRST_REX_INT_REG \ + && (REGNO) <= LAST_REX_INT_REG) \ + || ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \ + && (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \ + || (unsigned) reg_renumber[(REGNO)] <= STACK_POINTER_REGNUM) + +#define REGNO_OK_FOR_SIREG_P(REGNO) \ + ((REGNO) == 4 || reg_renumber[(REGNO)] == 4) +#define REGNO_OK_FOR_DIREG_P(REGNO) \ + ((REGNO) == 5 || reg_renumber[(REGNO)] == 5) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + + +/* Non strict versions, pseudos are ok. */ +#define REG_OK_FOR_INDEX_NONSTRICT_P(X) \ + (REGNO (X) < STACK_POINTER_REGNUM \ + || (REGNO (X) >= FIRST_REX_INT_REG \ + && REGNO (X) <= LAST_REX_INT_REG) \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +#define REG_OK_FOR_BASE_NONSTRICT_P(X) \ + (REGNO (X) <= STACK_POINTER_REGNUM \ + || REGNO (X) == ARG_POINTER_REGNUM \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || (REGNO (X) >= FIRST_REX_INT_REG \ + && REGNO (X) <= LAST_REX_INT_REG) \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* Strict versions, hard registers only */ +#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +#define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#ifndef REG_OK_STRICT +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_NONSTRICT_P (X) +#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_NONSTRICT_P (X) + +#else +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_STRICT_P (X) +#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_STRICT_P (X) +#endif + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. + + The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS, + except for CONSTANT_ADDRESS_P which is usually machine-independent. + + See legitimize_pic_address in i386.c for details as to what + constitutes a legitimate address when -fpic is used. */ + +#define MAX_REGS_PER_ADDRESS 2 + +#define CONSTANT_ADDRESS_P(X) constant_address_p (X) + +/* Nonzero if the constant value X is a legitimate general operand. + It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X) + +#ifdef REG_OK_STRICT +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ +do { \ + if (legitimate_address_p ((MODE), (X), 1)) \ + goto ADDR; \ +} while (0) + +#else +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ +do { \ + if (legitimate_address_p ((MODE), (X), 0)) \ + goto ADDR; \ +} while (0) + +#endif + +/* If defined, a C expression to determine the base term of address X. + This macro is used in only one place: `find_base_term' in alias.c. + + It is always safe for this macro to not be defined. It exists so + that alias analysis can understand machine-dependent addresses. + + The typical use of this macro is to handle addresses containing + a label_ref or symbol_ref within an UNSPEC. */ + +#define FIND_BASE_TERM(X) ix86_find_base_term (X) + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + MODE and WIN are passed so that this macro can use + GO_IF_LEGITIMATE_ADDRESS. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the 80386, we handle X+REG by loading X into a register R and + using R+REG. R will go in a general reg and indexing will be used. + However, if REG is a broken-out memory address or multiplication, + nothing needs to be done because REG can certainly go in a general reg. + + When -fpic is used, special handling is needed for symbolic references. + See comments by legitimize_pic_address in i386.c for details. */ + +#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) \ +do { \ + (X) = legitimize_address ((X), (OLDX), (MODE)); \ + if (memory_address_p ((MODE), (X))) \ + goto WIN; \ +} while (0) + +#define REWRITE_ADDRESS(X) rewrite_address (X) + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) + +#define SYMBOLIC_CONST(X) \ + (GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + +/* Go to LABEL if ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. + On the 80386, only postdecrement and postincrement address depend thus + (the amount of decrement or increment being the length of the operand). */ +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ +do { \ + if (GET_CODE (ADDR) == POST_INC \ + || GET_CODE (ADDR) == POST_DEC) \ + goto LABEL; \ +} while (0) + +/* Max number of args passed in registers. If this is more than 3, we will + have problems with ebx (register #4), since it is a caller save register and + is also used as the pic register in ELF. So for now, don't allow more than + 3 registers to be passed in registers. */ + +#define REGPARM_MAX (TARGET_64BIT ? 6 : 3) + +/* APPLE LOCAL regparmandstackparm */ +#define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_MACHO ? 4 : (TARGET_SSE ? 3 : 0))) + +#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0)) + + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE (!TARGET_64BIT || flag_pic ? SImode : DImode) + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Number of bytes moved into a data cache for a single prefetch operation. */ +#define PREFETCH_BLOCK ix86_cost->prefetch_block + +/* Number of prefetch operations that can be done in parallel. */ +#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 16 + +/* MOVE_MAX_PIECES is the number of bytes at a time which we can + move efficiently, as opposed to MOVE_MAX which is the maximum + number of bytes we can move with a single instruction. */ +#define MOVE_MAX_PIECES (TARGET_64BIT ? 8 : 4) + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. + Increasing the value will always make code faster, but eventually + incurs high cost in increased code size. + + If you don't define this, a reasonable default is used. */ + +#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio) + +/* If a clear memory operation would take CLEAR_RATIO or more simple + move-instruction sequences, we will do a clrmem or libcall instead. */ + +#define CLEAR_RATIO (optimize_size ? 2 \ + : ix86_cost->move_ratio > 6 ? 6 : ix86_cost->move_ratio) + +/* Define if shifts truncate the shift count + which implies one can omit a sign-extension or zero-extension + of a shift count. */ +/* On i386, shifts do truncate the count. But bit opcodes don't. */ + +/* #define SHIFT_COUNT_TRUNCATED */ + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* A macro to update M and UNSIGNEDP when an object whose type is + TYPE and which has the specified mode and signedness is to be + stored in a register. This macro is only called when TYPE is a + scalar type. + + On i386 it is sometimes useful to promote HImode and QImode + quantities to SImode. The choice depends on target type. */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ +do { \ + if (((MODE) == HImode && TARGET_PROMOTE_HI_REGS) \ + || ((MODE) == QImode && TARGET_PROMOTE_QI_REGS)) \ + (MODE) = SImode; \ +} while (0) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode (TARGET_64BIT ? DImode : SImode) + +/* A function address in a call instruction + is a byte address (for indexing purposes) + so give the MEM rtx a byte's mode. */ +#define FUNCTION_MODE QImode + +/* A C expression for the cost of moving data from a register in class FROM to + one in class TO. The classes are expressed using the enumeration values + such as `GENERAL_REGS'. A value of 2 is the default; other values are + interpreted relative to that. + + It is not required that the cost always equal 2 when FROM is the same as TO; + on some machines it is expensive to move between registers if they are not + general registers. */ + +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \ + ix86_register_move_cost ((MODE), (CLASS1), (CLASS2)) + +/* A C expression for the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + If moving between registers and memory is more expensive than + between two registers, you should define this macro to express the + relative cost. */ + +#define MEMORY_MOVE_COST(MODE, CLASS, IN) \ + ix86_memory_move_cost ((MODE), (CLASS), (IN)) + +/* A C expression for the cost of a branch instruction. A value of 1 + is the default; other values are interpreted relative to that. */ + +#define BRANCH_COST ix86_branch_cost + +/* Define this macro as a C expression which is nonzero if accessing + less than a word of memory (i.e. a `char' or a `short') is no + faster than accessing a word of memory, i.e., if such access + require more than one instruction or if there is no difference in + cost between byte and (aligned) word loads. + + When this macro is not defined, the compiler will access a field by + finding the smallest containing object; when it is defined, a + fullword load will be used if alignment permits. Unless bytes + accesses are faster than word accesses, using word accesses is + preferable since it may eliminate subsequent memory access if + subsequent accesses occur to other fields in the same word of the + structure, but to different bytes. */ + +/* APPLE LOCAL 6131435 */ +#define SLOW_BYTE_ACCESS (!flag_apple_kext && !flag_mkernel && !TARGET_64BIT) + +/* Nonzero if access to memory by shorts is slow and undesirable. */ +#define SLOW_SHORT_ACCESS 0 + +/* Define this macro to be the value 1 if unaligned accesses have a + cost many times greater than aligned accesses, for example if they + are emulated in a trap handler. + + When this macro is nonzero, the compiler will act as if + `STRICT_ALIGNMENT' were nonzero when generating code for block + moves. This can cause significantly more instructions to be + produced. Therefore, do not set this macro nonzero if unaligned + accesses only add a cycle or two to the time for a memory access. + + If the value of this macro is always zero, it need not be defined. */ + +/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */ + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. + + Desirable on the 386 because a CALL with a constant address is + faster than one with a register address. */ + +#define NO_FUNCTION_CSE + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. + + For floating-point equality comparisons, CCFPEQmode should be used. + VOIDmode should be used in all other cases. + + For integer comparisons against zero, reduce to CCNOmode or CCZmode if + possible, to allow for more combinations. */ + +#define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y)) + +/* Return nonzero if MODE implies a floating point inequality can be + reversed. */ + +#define REVERSIBLE_CC_MODE(MODE) 1 + +/* A C expression whose value is reversed condition code of the CODE for + comparison done in CC_MODE mode. */ +#define REVERSE_CONDITION(CODE, MODE) ix86_reverse_condition ((CODE), (MODE)) + + +/* Control the assembler format that we output, to the extent + this does not vary between assemblers. */ + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +/* In order to refer to the first 8 regs as 32 bit regs, prefix an "e". + For non floating point regs, the following are the HImode names. + + For float regs, the stack top is sometimes referred to as "%st(0)" + instead of just "%st". PRINT_OPERAND handles this with the "y" code. */ + +#define HI_REGISTER_NAMES \ +{"ax","dx","cx","bx","si","di","bp","sp", \ + "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)", \ + "argp", "flags", "fpsr", "dirflag", "frame", \ + "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \ + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" , \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"} + +#define REGISTER_NAMES HI_REGISTER_NAMES + +/* Table of additional register names to use in user input. */ + +#define ADDITIONAL_REGISTER_NAMES \ +{ { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 }, \ + { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 }, \ + { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 }, \ + { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 }, \ + { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 }, \ + { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 } } + +/* Note we are omitting these since currently I don't know how +to get gcc to use these, since they want the same but different +number as al, and ax. +*/ + +#define QI_REGISTER_NAMES \ +{"al", "dl", "cl", "bl", "sil", "dil", "bpl", "spl",} + +/* These parallel the array above, and can be used to access bits 8:15 + of regs 0 through 3. */ + +#define QI_HIGH_REGISTER_NAMES \ +{"ah", "dh", "ch", "bh", } + +/* How to renumber registers for dbx and gdb. */ + +#define DBX_REGISTER_NUMBER(N) \ + (TARGET_64BIT ? dbx64_register_map[(N)] : dbx_register_map[(N)]) + +extern int const dbx_register_map[FIRST_PSEUDO_REGISTER]; +extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER]; +extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER]; + +/* Before the prologue, RA is at 0(%esp). */ +#define INCOMING_RETURN_ADDR_RTX \ + gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM)) + +/* After the prologue, RA is at -4(AP) in the current frame. */ +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + ((COUNT) == 0 \ + ? gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, -UNITS_PER_WORD)) \ + : gen_rtx_MEM (Pmode, plus_constant (FRAME, UNITS_PER_WORD))) + +/* PC is dbx register 8; let's use that column for RA. */ +#define DWARF_FRAME_RETURN_COLUMN (TARGET_64BIT ? 16 : 8) + +/* Before the prologue, the top of the frame is at 4(%esp). */ +#define INCOMING_FRAME_SP_OFFSET UNITS_PER_WORD + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2) + + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + ??? All x86 object file formats are capable of representing this. + After all, the relocation needed is the same as for the call insn. + Whether or not a particular assembler allows us to enter such, I + guess we'll have to see. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + asm_preferred_eh_data_format ((CODE), (GLOBAL)) + +/* This is how to output an insn to push a register on the stack. + It need not be very fast code. */ + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \ +do { \ + if (TARGET_64BIT) \ + asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n", \ + reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \ + else \ + asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]); \ +} while (0) + +/* This is how to output an insn to pop a register from the stack. + It need not be very fast code. */ + +#define ASM_OUTPUT_REG_POP(FILE, REGNO) \ +do { \ + if (TARGET_64BIT) \ + asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n", \ + reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \ + else \ + asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]); \ +} while (0) + +/* This is how to output an element of a case-vector that is absolute. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + ix86_output_addr_vec_elt ((FILE), (VALUE)) + +/* This is how to output an element of a case-vector that is relative. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + ix86_output_addr_diff_elt ((FILE), (VALUE), (REL)) + +/* Under some conditions we need jump tables in the text section, + because the assembler cannot handle label differences between + sections. This is the case for x86_64 on Mach-O for example. */ + +#define JUMP_TABLES_IN_TEXT_SECTION \ + (flag_pic && ((TARGET_MACHO && TARGET_64BIT) \ + || (!TARGET_64BIT && !HAVE_AS_GOTOFF_IN_DATA))) + +/* Switch to init or fini section via SECTION_OP, emit a call to FUNC, + and switch back. For x86 we do this only to save a few bytes that + would otherwise be unused in the text section. */ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t" \ + "call " USER_LABEL_PREFIX #FUNC "\n" \ + TEXT_SECTION_ASM_OP); + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + Effect of various CODE letters is described in i386.c near + print_operand function. */ + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \ + ((CODE) == '*' || (CODE) == '+' || (CODE) == '&') + +#define PRINT_OPERAND(FILE, X, CODE) \ + print_operand ((FILE), (X), (CODE)) + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ + print_operand_address ((FILE), (ADDR)) + +#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL) \ +do { \ + if (! output_addr_const_extra (FILE, (X))) \ + goto FAIL; \ +} while (0); + +/* a letter which is not needed by the normal asm syntax, which + we can use for operand syntax in the extended asm */ + +#define ASM_OPERAND_LETTER '#' +#define RET return "" +#define AT_SP(MODE) (gen_rtx_MEM ((MODE), stack_pointer_rtx)) + +/* Which processor to schedule for. The cpu attribute defines a list that + mirrors this list, so changes to i386.md must be made at the same time. */ + +enum processor_type +{ + PROCESSOR_I386, /* 80386 */ + PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ + PROCESSOR_PENTIUM, + PROCESSOR_PENTIUMPRO, + PROCESSOR_K6, + PROCESSOR_ATHLON, + PROCESSOR_PENTIUM4, + PROCESSOR_K8, + PROCESSOR_NOCONA, + /* APPLE LOCAL mainline */ + PROCESSOR_CORE2, + PROCESSOR_GENERIC32, + PROCESSOR_GENERIC64, + PROCESSOR_max +}; + +extern enum processor_type ix86_tune; +extern enum processor_type ix86_arch; + +enum fpmath_unit +{ + FPMATH_387 = 1, + FPMATH_SSE = 2 +}; + +extern enum fpmath_unit ix86_fpmath; + +enum tls_dialect +{ + TLS_DIALECT_GNU, + TLS_DIALECT_GNU2, + TLS_DIALECT_SUN +}; + +extern enum tls_dialect ix86_tls_dialect; + +enum cmodel { + CM_32, /* The traditional 32-bit ABI. */ + CM_SMALL, /* Assumes all code and data fits in the low 31 bits. */ + CM_KERNEL, /* Assumes all code and data fits in the high 31 bits. */ + CM_MEDIUM, /* Assumes code fits in the low 31 bits; data unlimited. */ + CM_LARGE, /* No assumptions. */ + CM_SMALL_PIC, /* Assumes code+data+got/plt fits in a 31 bit region. */ + CM_MEDIUM_PIC /* Assumes code+got/plt fits in a 31 bit region. */ +}; + +extern enum cmodel ix86_cmodel; + +/* Size of the RED_ZONE area. */ +#define RED_ZONE_SIZE 128 +/* Reserved area of the red zone for temporaries. */ +#define RED_ZONE_RESERVE 8 + +enum asm_dialect { + ASM_ATT, + ASM_INTEL +}; + +extern enum asm_dialect ix86_asm_dialect; +/* APPLE LOCAL begin regparmandstackparm */ +extern void ix86_darwin_handle_regparmandstackparm (tree fndecl); +extern void ix86_darwin_redirect_calls(void); +/* APPLE LOCAL end regparmandstackparm */ + +extern unsigned int ix86_preferred_stack_boundary; +/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */ +extern unsigned int ix86_save_preferred_stack_boundary; +/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */ +extern int ix86_branch_cost, ix86_section_threshold; + +/* Smallest class containing REGNO. */ +extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER]; + +extern rtx ix86_compare_op0; /* operand 0 for comparisons */ +extern rtx ix86_compare_op1; /* operand 1 for comparisons */ +extern rtx ix86_compare_emitted; + +/* To properly truncate FP values into integers, we need to set i387 control + word. We can't emit proper mode switching code before reload, as spills + generated by reload may truncate values incorrectly, but we still can avoid + redundant computation of new control word by the mode switching pass. + The fldcw instructions are still emitted redundantly, but this is probably + not going to be noticeable problem, as most CPUs do have fast path for + the sequence. + + The machinery is to emit simple truncation instructions and split them + before reload to instructions having USEs of two memory locations that + are filled by this code to old and new control word. + + Post-reload pass may be later used to eliminate the redundant fildcw if + needed. */ + +enum ix86_entity +{ + I387_TRUNC = 0, + I387_FLOOR, + I387_CEIL, + I387_MASK_PM, + MAX_386_ENTITIES +}; + +enum ix86_stack_slot +{ + SLOT_VIRTUAL = 0, + SLOT_TEMP, + SLOT_CW_STORED, + SLOT_CW_TRUNC, + SLOT_CW_FLOOR, + SLOT_CW_CEIL, + SLOT_CW_MASK_PM, + MAX_386_STACK_LOCALS +}; + +/* Define this macro if the port needs extra instructions inserted + for mode switching in an optimizing compilation. */ + +#define OPTIMIZE_MODE_SWITCHING(ENTITY) \ + ix86_optimize_mode_switching[(ENTITY)] + +/* If you define `OPTIMIZE_MODE_SWITCHING', you have to define this as + initializer for an array of integers. Each initializer element N + refers to an entity that needs mode switching, and specifies the + number of different modes that might need to be set for this + entity. The position of the initializer in the initializer - + starting counting at zero - determines the integer that is used to + refer to the mode-switched entity in question. */ + +#define NUM_MODES_FOR_MODE_SWITCHING \ + { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } + +/* ENTITY is an integer specifying a mode-switched entity. If + `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to + return an integer value not larger than the corresponding element + in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY + must be switched into prior to the execution of INSN. */ + +#define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I)) + +/* This macro specifies the order in which modes for ENTITY are + processed. 0 is the highest priority. */ + +#define MODE_PRIORITY_TO_MODE(ENTITY, N) (N) + +/* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE + is the set of hard registers live at the point where the insn(s) + are to be inserted. */ + +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \ + ? emit_i387_cw_initialization (MODE), 0 \ + : 0) + + +/* Avoid renaming of stack registers, as doing so in combination with + scheduling just increases amount of live registers at time and in + the turn amount of fxch instructions needed. + + ??? Maybe Pentium chips benefits from renaming, someone can try.... */ + +#define HARD_REGNO_RENAME_OK(SRC, TARGET) \ + ((SRC) < FIRST_STACK_REG || (SRC) > LAST_STACK_REG) + + +#define DLL_IMPORT_EXPORT_PREFIX '#' + +#define FASTCALL_PREFIX '@' + +struct machine_function GTY(()) +{ + struct stack_local_entry *stack_locals; + const char *some_ld_name; + rtx force_align_arg_pointer; + int save_varrargs_registers; + int accesses_prev_frame; + int optimize_mode_switching[MAX_386_ENTITIES]; + /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to + determine the style used. */ + int use_fast_prologue_epilogue; + /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed + for. */ + int use_fast_prologue_epilogue_nregs; + /* If true, the current function needs the default PIC register, not + an alternate register (on x86) and must not use the red zone (on + x86_64), even if it's a leaf function. We don't want the + function to be regarded as non-leaf because TLS calls need not + affect register allocation. This flag is set when a TLS call + instruction is expanded within a function, and never reset, even + if all such instructions are optimized away. Use the + ix86_current_function_calls_tls_descriptor macro for a better + approximation. */ + int tls_descriptor_call_expanded_p; +}; + +#define ix86_stack_locals (cfun->machine->stack_locals) +#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) +#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching) +#define ix86_tls_descriptor_calls_expanded_in_cfun \ + (cfun->machine->tls_descriptor_call_expanded_p) +/* Since tls_descriptor_call_expanded is not cleared, even if all TLS + calls are optimized away, we try to detect cases in which it was + optimized away. Since such instructions (use (reg REG_SP)), we can + verify whether there's any such instruction live by testing that + REG_SP is live. */ +#define ix86_current_function_calls_tls_descriptor \ + (ix86_tls_descriptor_calls_expanded_in_cfun && regs_ever_live[SP_REG]) + +/* Control behavior of x86_file_start. */ +#define X86_FILE_START_VERSION_DIRECTIVE false +#define X86_FILE_START_FLTUSED false + +/* APPLE LOCAL begin CW asm blocks */ +#undef TARGET_IASM_EXTRA_INFO +#define TARGET_IASM_EXTRA_INFO \ + char mod[3]; \ + bool as_immediate; \ + bool as_offset; \ + bool pseudo; + +#define TARGET_IASM_REORDER_ARG(OPCODE, NEWARGNUM, NUM_ARGS, ARGNUM) \ + do { \ + /* If we are outputting AT&T style assembly language, the argument \ + numbering is reversed. */ \ + if (iasm_x86_needs_swapping (opcode)) \ + NEWARGNUM = NUM_ARGS - ARGNUM + 1; \ + } while (0) + +#define IASM_SYNTH_CONSTRAINTS(R, ARGNUM, NUM_ARGS, DB) \ + do { \ + /* On x86, operand 2 or 3 can be left out and the assembler will deal with it. \ + \ + Take for example an opcode: \ + \ + opcode r m i \ + \ + We allow: \ + \ + opcode r mi \ + \ + when we have only 2 operands. */ \ + if (R \ + && ARGNUM == 2 \ + && NUM_ARGS == 2 \ + && R < &DB[sizeof(DB) / sizeof (DB[0]) - 1] \ + && strcmp (R[1].opcode, R->opcode) == 0 \ + && R[1].argnum == 3) \ + { \ + tree t; \ + size_t len = strlen (r->constraint) + strlen (r[1].constraint) + 1; \ + char *p = alloca (len); \ + \ + sprintf(p, "%s%s", r->constraint, r[1].constraint); \ + t = build_string (len, p); \ + return TREE_STRING_POINTER (t); \ + } \ + } while (0) + +#define TARGET_IASM_PRINT_OP(BUF, ARG, ARGNUM, USES, MUST_BE_REG, MUST_NOT_BE_REG, E) \ + iasm_print_op (BUF, ARG, ARGNUM, USES, MUST_BE_REG, MUST_NOT_BE_REG, E) + +extern tree iasm_x86_canonicalize_operands (const char **, tree, void *); +/* On x86, we can rewrite opcodes, change argument ordering and so no... */ +#define IASM_CANONICALIZE_OPERANDS(OPCODE, NEW_OPCODE, IARGS, E) \ + do { \ + NEW_OPCODE = OPCODE; \ + IARGS = iasm_x86_canonicalize_operands (&NEW_OPCODE, IARGS, E); \ + } while (0) + +#define IASM_SEE_OPCODE(YYCHAR, T) \ + /* If we see an int, arrange to see it as an identifier (opcode), \ + not as a type. */ \ + ((YYCHAR == TYPESPEC \ + && C_RID_CODE (T) == RID_INT) \ + ? IDENTIFIER : YYCHAR) + +/* Return true iff the ID is a prefix for an instruction. */ + +#define IASM_IS_PREFIX(ID) \ + do { \ + const char *myname = IDENTIFIER_POINTER (ID); \ + if (strcasecmp (myname, "lock") == 0 \ + || strcasecmp (myname, "rep") == 0 \ + || strcasecmp (myname, "repe") == 0 \ + || strcasecmp (myname, "repz") == 0 \ + || strcasecmp (myname, "repne") == 0 \ + || strcasecmp (myname, "repnz") == 0) \ + return true; \ + } while (0) + +#define IASM_PRINT_PREFIX(BUF, PREFIX_LIST) iasm_x86_print_prefix(BUF, PREFIX_LIST) + +#define IASM_IMMED_PREFIX(E, BUF) \ + do { \ + if (!E->pseudo && ! E->as_immediate) \ + sprintf (BUF + strlen (BUF), "$"); \ + } while (0) + +#define IASM_OFFSET_PREFIX(E, BUF) \ + do { \ + if (E->as_offset) \ + sprintf (BUF + strlen (BUF), "$"); \ + } while (0) + +/* We can't yet expose ST(x) to reg-stack.c, don't try. */ +#define IASM_HIDE_REG(R) FP_REGNO_P (R) + +#define IASM_SEE_IMMEDIATE(E) \ + E->as_immediate = true + +#define IASM_SEE_NO_IMMEDIATE(E) \ + E->as_immediate = false + +/* Table of instructions that need extra constraints. Keep this table sorted. */ +#undef TARGET_IASM_OP_CONSTRAINT +#define TARGET_IASM_OP_CONSTRAINT \ + { "adc", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64 },\ + { "adc", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64 },\ + { "add", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64 },\ + { "add", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "addpd", 1, "+x"}, \ + { "addpd", 2, "xm"}, \ + { "addps", 1, "+x"}, \ + { "addps", 2, "xm"}, \ + { "addsd", 1, "+x"}, \ + { "addsd", 2, "xm"}, \ + { "addss", 1, "+x"}, \ + { "addss", 2, "xm"}, \ + { "addsubpd", 1, "+x"}, \ + { "addsubpd", 2, "xm"}, \ + { "addsubps", 1, "+x"}, \ + { "addsubps", 2, "xm"}, \ + { "and", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "and", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "andnpd", 1, "+x"}, \ + { "andnpd", 2, "xm"}, \ + { "andnps", 1, "+x"}, \ + { "andnps", 2, "xm"}, \ + { "andpd", 1, "+x"}, \ + { "andpd", 2, "xm"}, \ + { "andps", 1, "+x"}, \ + { "andps", 2, "xm"}, \ + { NX "arpl", 1, "+" rm16}, \ + { NX "arpl", 2, r16}, \ + { "bound", 1, U("r")}, \ + { "bound", 2, U("m")}, \ + { "bsf", 1, "=r"}, \ + { "bsf", 2, "rm"}, \ + { "bsr", 1, "=r"}, \ + { "bsr", 2, "rm"}, \ + { "bt", 1, "rm"}, \ + { "bt", 2, "ri"}, \ + { "btc", 1, "rm"}, \ + { "btc", 2, "ri"}, \ + { "btr", 1, "rm"}, \ + { "btr", 2, "ri"}, \ + { "bts", 1, "rm"}, \ + { "bts", 2, "ri"}, \ + { NX "call", 1, "rsm"}, \ + { "clflush", 1, "=m"}, \ + { "cmova", 1, r16 "," r32 C R64},\ + { "cmova", 2, rm16 "," rm32 C RM64},\ + { "cmovae", 2, "rm"}, \ + { "cmovb", 2, "rm"}, \ + { "cmovbe", 2, "rm"}, \ + { "cmovc", 2, "rm"}, \ + { "cmove", 2, "rm"}, \ + { "cmovg", 2, "rm"}, \ + { "cmovge", 2, "rm"}, \ + { "cmovl", 2, "rm"}, \ + { "cmovle", 2, "rm"}, \ + { "cmovna", 2, "rm"}, \ + { "cmovnae", 2, "rm"}, \ + { "cmovnb", 2, "rm"}, \ + { "cmovnbe", 2, "rm"}, \ + { "cmovnc", 2, "rm"}, \ + { "cmovne", 2, "rm"}, \ + { "cmovng", 2, "rm"}, \ + { "cmovnge", 2, "rm"}, \ + { "cmovnl", 2, "rm"}, \ + { "cmovnle", 2, "rm"}, \ + { "cmovno", 2, "rm"}, \ + { "cmovnp", 2, "rm"}, \ + { "cmovns", 2, "rm"}, \ + { "cmovnz", 2, "rm"}, \ + { "cmovo", 2, "rm"}, \ + { "cmovp", 2, "rm"}, \ + { "cmovpe", 2, "rm"}, \ + { "cmovpo", 2, "rm"}, \ + { "cmovs", 2, "rm"}, \ + { "cmovz", 2, "rm"}, \ + { "cmp", 1, rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "cmp", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "cmpeqpd", 1, "=x"}, \ + { "cmpeqpd", 2, "xm"}, \ + { "cmpeqps", 1, "=x"}, \ + { "cmpeqps", 2, "xm"}, \ + { "cmpeqsd", 1, "=x"}, \ + { "cmpeqsd", 2, "xm"}, \ + { "cmpeqss", 1, "=x"}, \ + { "cmpeqss", 2, "xm"}, \ + { "cmplepd", 1, "=x"}, \ + { "cmplepd", 2, "xm"}, \ + { "cmpleps", 1, "=x"}, \ + { "cmpleps", 2, "xm"}, \ + { "cmplesd", 1, "=x"}, \ + { "cmplesd", 2, "xm"}, \ + { "cmpless", 1, "=x"}, \ + { "cmpless", 2, "xm"}, \ + { "cmpltpd", 1, "=x"}, \ + { "cmpltpd", 2, "xm"}, \ + { "cmpltps", 1, "=x"}, \ + { "cmpltps", 2, "xm"}, \ + { "cmpltsd", 1, "=x"}, \ + { "cmpltsd", 2, "xm"}, \ + { "cmpltss", 1, "=x"}, \ + { "cmpltss", 2, "xm"}, \ + { "cmpneqpd", 1, "=x"}, \ + { "cmpneqpd", 2, "xm"}, \ + { "cmpneqps", 1, "=x"}, \ + { "cmpneqps", 2, "xm"}, \ + { "cmpneqsd", 1, "=x"}, \ + { "cmpneqsd", 2, "xm"}, \ + { "cmpneqss", 1, "=x"}, \ + { "cmpneqss", 2, "xm"}, \ + { "cmpnlepd", 1, "=x"}, \ + { "cmpnlepd", 2, "xm"}, \ + { "cmpnleps", 1, "=x"}, \ + { "cmpnleps", 2, "xm"}, \ + { "cmpnlesd", 1, "=x"}, \ + { "cmpnlesd", 2, "xm"}, \ + { "cmpnless", 1, "=x"}, \ + { "cmpnless", 2, "xm"}, \ + { "cmpnltpd", 1, "=x"}, \ + { "cmpnltpd", 2, "xm"}, \ + { "cmpnltps", 1, "=x"}, \ + { "cmpnltps", 2, "xm"}, \ + { "cmpnltsd", 1, "=x"}, \ + { "cmpnltsd", 2, "xm"}, \ + { "cmpnltss", 1, "=x"}, \ + { "cmpnltss", 2, "xm"}, \ + { "cmpordpd", 1, "=x"}, \ + { "cmpordpd", 2, "xm"}, \ + { "cmpordps", 1, "=x"}, \ + { "cmpordps", 2, "xm"}, \ + { "cmpordsd", 1, "=x"}, \ + { "cmpordsd", 2, "xm"}, \ + { "cmpordss", 1, "=x"}, \ + { "cmpordss", 2, "xm"}, \ + { "cmppd", 1, "=x"}, \ + { "cmppd", 2, "xm"}, \ + { "cmppd", 3, "i"}, \ + { "cmpps", 1, "=x"}, \ + { "cmpps", 2, "xm"}, \ + { "cmpps", 3, "i"}, \ + { "cmpsd", 1, "=x"}, \ + { "cmpsd", 2, "xm"}, \ + { "cmpsd", 3, "i"}, \ + { "cmpss", 1, "=x"}, \ + { "cmpss", 2, "xm"}, \ + { "cmpss", 3, "i"}, \ + { "cmpunordpd", 1, "=x"}, \ + { "cmpunordpd", 2, "xm"}, \ + { "cmpunordps", 1, "=x"}, \ + { "cmpunordps", 2, "xm"}, \ + { "cmpunordsd", 1, "=x"}, \ + { "cmpunordsd", 2, "xm"}, \ + { "cmpunordss", 1, "=x"}, \ + { "cmpunordss", 2, "xm"}, \ + { "cmpxchg", 1, "+mr"}, \ + { "cmpxchg", 2, "r"}, \ + { "comisd", 1, "x"}, \ + { "comisd", 2, "xm"}, \ + { "comiss", 1, "x"}, \ + { "comiss", 2, "xm"}, \ + { "cvtdq2pd", 1, "=x"}, \ + { "cvtdq2pd", 2, "xm"}, \ + { "cvtdq2ps", 1, "=x"}, \ + { "cvtdq2ps", 2, "xm"}, \ + { "cvtpd2dq", 1, "=x"}, \ + { "cvtpd2dq", 2, "xm"}, \ + { "cvtpd2pi", 1, "=y"}, \ + { "cvtpd2pi", 2, "xm"}, \ + { "cvtpd2ps", 1, "=x"}, \ + { "cvtpd2ps", 2, "xm"}, \ + { "cvtpi2pd", 1, "=x"}, \ + { "cvtpi2pd", 2, "ym"}, \ + { "cvtpi2ps", 1, "=x"}, \ + { "cvtpi2ps", 2, "ym"}, \ + { "cvtps2dq", 1, "=x"}, \ + { "cvtps2dq", 2, "xm"}, \ + { "cvtps2pd", 1, "=x"}, \ + { "cvtps2pd", 2, "xm"}, \ + { "cvtps2pi", 1, "=y"}, \ + { "cvtps2pi", 2, "xm"}, \ + { "cvtsd2si", 1, "=" r32R64}, \ + { "cvtsd2si", 2, "xm"}, \ + { "cvtsd2ss", 1, "=x"}, \ + { "cvtsd2ss", 2, "xm"}, \ + { "cvtsi2sd", 1, "=x"}, \ + { "cvtsi2sd", 2, rm32RM64}, \ + { "cvtsi2ss", 1, "=x"}, \ + { "cvtsi2ss", 2, rm32RM64}, \ + { "cvtss2sd", 1, "=x"}, \ + { "cvtss2sd", 2, "xm"}, \ + { "cvtss2si", 1, "=r"}, \ + { "cvtss2si", 2, "xm"}, \ + { "cvttpd2dq", 1, "=x"}, \ + { "cvttpd2dq", 2, "xm"}, \ + { "cvttpd2pi", 1, "=y"}, \ + { "cvttpd2pi", 2, "xm"}, \ + { "cvttps2dq", 1, "=x"}, \ + { "cvttps2dq", 2, "xm"}, \ + { "cvttps2pi", 1, "=y"}, \ + { "cvttps2pi", 2, "xm"}, \ + { "cvttsd2si", 1, "=r"}, \ + { "cvttsd2si", 2, "xm"}, \ + { "cvttss2si", 1, "=r"}, \ + { "cvttss2si", 2, "xm"}, \ + { "dec", 1, "+" rm8rm16rm32RM64},\ + { "div", 1, rm8rm16rm32}, \ + { "divpd", 1, "+x"}, \ + { "divpd", 2, "xm"}, \ + { "divps", 1, "+x"}, \ + { "divps", 2, "xm"}, \ + { "divsd", 1, "+x"}, \ + { "divsd", 2, "xm"}, \ + { "divss", 1, "+x"}, \ + { "divss", 2, "xm"}, \ + { "enter", 1, "i"}, \ + { "enter", 2, "i"}, \ + { "fadd", 1, "+t,f,@"}, \ + { "fadd", 2, "f,t," m32fpm64fp},\ + { "faddp", 1, "+f"}, \ + { "faddp", 2, "t"}, \ + { "fbld", 1, "m"}, \ + { "fbstp", 1, "m"}, \ + { "fcmovb", 1, "=t"}, \ + { "fcmovb", 2, "f"}, \ + { "fcmovbe", 1, "=t"}, \ + { "fcmovbe", 2, "f"}, \ + { "fcmove", 1, "=t"}, \ + { "fcmove", 2, "f"}, \ + { "fcmovnb", 1, "=t"}, \ + { "fcmovnb", 2, "f"}, \ + { "fcmovnbe", 1, "=t"}, \ + { "fcmovnbe", 2, "f"}, \ + { "fcmovne", 1, "=t"}, \ + { "fcmovne", 2, "f"}, \ + { "fcmovnu", 1, "=t"}, \ + { "fcmovnu", 2, "f"}, \ + { "fcmovu", 1, "=t"}, \ + { "fcmovu", 2, "f"}, \ + { "fcom", 1, "f" m32fpm64fp}, \ + { "fcomi", 1, "t"}, \ + { "fcomi", 2, "f"}, \ + { "fcomip", 1, "t"}, \ + { "fcomip", 2, "f"}, \ + { "fcomp", 1, "f" m32fpm64fp},\ + { "fdiv", 1, "+t,f,@"}, \ + { "fdiv", 2, "f,t," m32fpm64fp},\ + { "fdivp", 1, "+f"}, \ + { "fdivp", 2, "t"}, \ + { "fdivr", 1, "+t,@"}, \ + { "fdivr", 2, "f," m32fpm64fp},\ + { "fdivrp", 1, "+f"}, \ + { "fdivrp", 2, "t"}, \ + { "ffree", 1, "f"}, \ + { "fiadd", 1, m16m32}, \ + { "ficom", 1, m16m32}, \ + { "ficomp", 1, m16m32}, \ + { "fidiv", 1, m16m32}, \ + { "fidivr", 1, m16m32}, \ + { "fild", 1, m16m32m64}, \ + { "fimul", 1, m16m32}, \ + { "fist", 1, "=" m16m32}, \ + { "fistp", 1, "=" m16m32m64}, \ + { "fisttp", 1, "=" m16m32m64},\ + { "fisub", 1, m16m32}, \ + { "fisubr", 1, m16m32}, \ + { "fld", 1, "f" m32fpm64fpm80fp},\ + { "fldcw", 1, m16}, \ + { "fldenv", 1, "m"}, \ + { "fldt", 1, "m"}, \ + { "fmul", 1, "=f,t,@"}, \ + { "fmul", 2, "t,f," m32fpm64fp},\ + { "fmulp", 1, "=f"}, \ + { "fmulp", 2, "t"}, \ + { "fnsave", 1, "=m"}, \ + { "fnstcw", 1, "m"}, \ + { "fnstenv", 1, "m"}, \ + { "fnstsw", 1, "ma"}, \ + { "frstor", 1, "m"}, \ + { "fsave", 1, "=m"}, \ + { "fst", 1, "=f" m32fpm64fp}, \ + { "fstcw", 1, "=m"}, \ + { "fstenv", 1, "=m"}, \ + { "fstp", 1, "=f" m32fpm64fpm80fp},\ + { "fstsw", 1, "=ma"}, \ + { "fsub", 1, "=f,t,@"}, \ + { "fsub", 2, "t,f," m32fpm64fp},\ + { "fsubr", 1, "=f,t," m32fpm64fp},\ + { "fsubr", 2, "t,f,@"}, \ + { "fucom", 1, "f"}, \ + { "fucomi", 1, "t"}, \ + { "fucomi", 2, "f"}, \ + { "fucomip", 1, "t"}, \ + { "fucomip", 2, "f"}, \ + { "fucomp", 1, "f"}, \ + { "fxch", 1, "+f" }, \ + { "fxrstor", 1, "m"}, \ + { "fxsave", 1, "=m"}, \ + { "haddpd", 1, "+x"}, \ + { "haddpd", 2, "xm"}, \ + { "haddps", 1, "+x"}, \ + { "haddps", 2, "xm"}, \ + { "hsubpd", 1, "+x"}, \ + { "hsubpd", 2, "xm"}, \ + { "hsubps", 1, "+x"}, \ + { "hsubps", 2, "xm"}, \ + { "idiv", 1, rm8rm16rm32RM64},\ + { "imul", 1, "+r"}, \ + { "imul", 2, "rm"}, \ + { "imul", 3, "i"}, \ + { "in", 1, "=a"}, \ + { "in", 2, "i"}, \ + { "inc", 1, "+" rm8rm16rm32RM64},\ + { NX "ins", 1, "=" m8m16m32}, \ + { NX "ins", 2, "d"}, \ + { "int", 1, "i"}, \ + { "invlpg", 1, "m"}, \ + { "ja", 1, "s"}, \ + { "jae", 1, "s"}, \ + { "jb", 1, "s"}, \ + { "jbe", 1, "s"}, \ + { "jc", 1, "s"}, \ + { NX "jcxz", 1, rel8}, \ + { "je", 1, "s"}, \ + { "jecxz", 1, rel8}, \ + { "jg", 1, "s"}, \ + { "jge", 1, "s"}, \ + { "jl", 1, "s"}, \ + { "jle", 1, "s"}, \ + { NX "jmp", 1, "s" rm32}, \ + { "jna", 1, "s"}, \ + { "jnae", 1, "s"}, \ + { "jnb", 1, "s"}, \ + { "jnc", 1, "s"}, \ + { "jne", 1, "s"}, \ + { "jng", 1, "s"}, \ + { "jnge", 1, "s"}, \ + { "jnl", 1, "s"}, \ + { "jnle", 1, "s"}, \ + { "jno", 1, "s"}, \ + { "jnp", 1, "s"}, \ + { "jns", 1, "s"}, \ + { "jnz", 1, "s"}, \ + { "jo", 1, "s"}, \ + { "jp", 1, "s"}, \ + { "jpe", 1, "s"}, \ + { "jpo", 1, "s"}, \ + { "js", 1, "s"}, \ + { "jz", 1, "s"}, \ + { "lar", 1, "=r"}, \ + { "lar", 2, "rm"}, \ + { "lddqu", 1, "=x"}, \ + { "lddqu", 2, "m"}, \ + { "ldmxcsr", 1, "m"}, \ + { NX "lds", 1, "=" r16 "," r32 C R64},\ + { NX "lds", 2, m16 "," m32 C M64},\ + { "lea", 1, "=r"}, \ + { "lea", 2, "m"}, \ + { NX "les", 1, "=" r16 "," r32 C R64},\ + { NX "les", 2, m16 "," m32 C M64},\ + { "lfs", 1, "=" r16 "," r32 C R64},\ + { "lfs", 2, m16 "," m32 C M64},\ + { "lgdt", 1, "m"}, \ + { "lgs", 1, "=" r16 "," r32 C R64},\ + { "lgs", 2, m16 "," m32 C M64},\ + { "lidt", 1, "m"}, \ + { "lldt", 1, rm16}, \ + { "lmsw", 1, "m"}, \ + { NX "lods", 1, m8m16m32M64}, \ + { "loop", 1, rel8}, \ + { "loope", 1, rel8}, \ + { "loopne", 1, rel8}, \ + { "loopnz", 1, rel8}, \ + { "loopz", 1, rel8}, \ + { "lsl", 1, "=" r16 "," r32}, \ + { "lsl", 2, rm16 "," rm32}, \ + { "lss", 1, "=" r16 "," r32 C R64},\ + { "lss", 2, m16 "," m32 C M64},\ + { "ltr", 1, rm16}, \ + { "maskmovdqu", 1, "x"}, \ + { "maskmovdqu", 2, "x"}, \ + { "maskmovq", 1, "y"}, \ + { "maskmovq", 2, "y"}, \ + { "maxpd", 1, "+x"}, \ + { "maxpd", 2, "xm"}, \ + { "maxps", 1, "+x"}, \ + { "maxps", 2, "xm"}, \ + { "maxsd", 1, "+x"}, \ + { "maxsd", 2, "xm"}, \ + { "maxss", 1, "+x"}, \ + { "maxss", 2, "xm"}, \ + { "minpd", 1, "+x"}, \ + { "minpd", 2, "xm"}, \ + { "minps", 1, "+x"}, \ + { "minps", 2, "xm"}, \ + { "minsd", 1, "+x"}, \ + { "minsd", 2, "xm"}, \ + { "minss", 1, "+x"}, \ + { "minss", 2, "xm"}, \ + { "mov", 1, "=" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64}, \ + { "mov", 2, ri8 "," ri16 "," ri32 C RI64 "," rmi8 "," rmi16 "," rmi32 C RMI64}, \ + { "movapd", 1, "=x,xm"}, \ + { "movapd", 2, "xm,x"}, \ + { "movaps", 1, "=x,xm"}, \ + { "movaps", 2, "xm,x"}, \ + { "movd", 1, "=rm,x,y,rm"}, \ + { "movd", 2, "x,rm,rm,y"}, \ + { "movddup", 1, "=x"}, \ + { "movddup", 2, "xm"}, \ + { "movdq2q", 1, "=y"}, \ + { "movdq2q", 2, "x"}, \ + { "movdqa", 1, "=x"}, \ + { "movdqa", 2, "xm"}, \ + { "movdqu", 1, "=x"}, \ + { "movdqu", 2, "xm"}, \ + { "movhlps", 1, "=x"}, \ + { "movhlps", 2, "x"}, \ + { "movhpd", 1, "=x,m"}, \ + { "movhpd", 2, "m,x"}, \ + { "movhps", 1, "=x,m"}, \ + { "movhps", 2, "m,x"}, \ + { "movlhps", 1, "=x"}, \ + { "movlhps", 2, "x"}, \ + { "movlpd", 1, "=x,m"}, \ + { "movlpd", 2, "m,x"}, \ + { "movlps", 1, "=x,m"}, \ + { "movlps", 2, "m,x"}, \ + { "movmskpd", 1, "=r"}, \ + { "movmskpd", 2, "x"}, \ + { "movmskps", 1, "=r"}, \ + { "movmskps", 2, "x"}, \ + { "movntdq", 1, "=m"}, \ + { "movntdq", 2, "x"}, \ + { "movnti", 1, "=m"}, \ + { "movnti", 2, "r"}, \ + { "movntpd", 1, "=m"}, \ + { "movntpd", 2, "x"}, \ + { "movntps", 1, "=m"}, \ + { "movntps", 2, "x"}, \ + { "movntq", 1, "=m"}, \ + { "movntq", 2, "y"}, \ + { "movq", 1, "=x,m,y,m"}, \ + { "movq", 2, "xm,x,ym,y"}, \ + { "movq2dq", 1, "=x"}, \ + { "movq2dq", 2, "y"}, \ + { "movs", 1, "=" m8 "," m16 "," m32 C M64},\ + { "movs", 2, m8 "," m16 "," m32 C M64},\ + { "movsd", 1, "=xm,x"}, \ + { "movsd", 2, "x,xm"}, \ + { "movshdup", 1, "=x"}, \ + { "movshdup", 2, "xm"}, \ + { "movsldup", 1, "=x"}, \ + { "movsldup", 2, "xm"}, \ + { "movss", 1, "=xm,x"}, \ + { "movss", 2, "x,xm"}, \ + { "movsx", 1, "=" r16 "," r32},\ + { "movsx", 2, rm8 "," rm8rm16},\ + { "movupd", 1, "=x,xm"}, \ + { "movupd", 2, "xm,x"}, \ + { "movups", 1, "=x,xm"}, \ + { "movups", 2, "xm,x"}, \ + { "movzx", 1, "=" r16 "," r32},\ + { "movzx", 2, rm8 "," rm8rm16},\ + { "mul", 1, rm8rm16rm32}, \ + { "mulpd", 1, "=x"}, \ + { "mulpd", 2, "xm"}, \ + { "mulps", 1, "=x"}, \ + { "mulps", 2, "xm"}, \ + { "mulsd", 1, "=x"}, \ + { "mulsd", 2, "xm"}, \ + { "mulss", 1, "=x"}, \ + { "mulss", 2, "xm"}, \ + { "neg", 1, "+" rm8rm16rm32}, \ + { "not", 1, "+" rm8rm16rm32}, \ + { "or", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "or", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "orpd", 1, "+x"}, \ + { "orpd", 2, "xm"}, \ + { "orps", 1, "+x"}, \ + { "orps", 2, "xm"}, \ + { "out", 1, "id"}, \ + { "out", 2, a8 a16 a32}, \ + { NX "outs", 1, "d"}, \ + { NX "outs", 2, m8m16m32}, \ + { "packssdw", 1, "+x,y"}, \ + { "packssdw", 2, "xm,ym"}, \ + { "packsswb", 1, "+x,y"}, \ + { "packsswb", 2, "xm,ym"}, \ + { "packuswb", 1, "+x,y"}, \ + { "packuswb", 2, "xm,ym"}, \ + { "paddb", 1, "+x,y"}, \ + { "paddb", 2, "xm,ym"}, \ + { "paddd", 1, "+x,y"}, \ + { "paddd", 2, "xm,ym"}, \ + { "paddq", 1, "+x,y"}, \ + { "paddq", 2, "xm,ym"}, \ + { "paddsb", 1, "+x,y"}, \ + { "paddsb", 2, "xm,ym"}, \ + { "paddsw", 1, "+x,y"}, \ + { "paddsw", 2, "xm,ym"}, \ + { "paddusb", 1, "+x,y"}, \ + { "paddusb", 2, "xm,ym"}, \ + { "paddusw", 1, "+x,y"}, \ + { "paddusw", 2, "xm,ym"}, \ + { "paddw", 1, "+x,y"}, \ + { "paddw", 2, "xm,ym"}, \ + { "pand", 1, "+x,y"}, \ + { "pand", 2, "xm,ym"}, \ + { "pandn", 1, "+x,y"}, \ + { "pandn", 2, "xm,ym"}, \ + { "pavgb", 1, "+x,y"}, \ + { "pavgb", 2, "xm,ym"}, \ + { "pavgw", 1, "+x,y"}, \ + { "pavgw", 2, "xm,ym"}, \ + { "pcmpeqb", 1, "+x,y"}, \ + { "pcmpeqb", 2, "xm,ym"}, \ + { "pcmpeqd", 1, "+x,y"}, \ + { "pcmpeqd", 2, "xm,ym"}, \ + { "pcmpeqw", 1, "+x,y"}, \ + { "pcmpeqw", 2, "xm,ym"}, \ + { "pcmpgtb", 1, "+x,y"}, \ + { "pcmpgtb", 2, "xm,ym"}, \ + { "pcmpgtd", 1, "+x,y"}, \ + { "pcmpgtd", 2, "xm,ym"}, \ + { "pcmpgtw", 1, "+x,y"}, \ + { "pcmpgtw", 2, "xm,ym"}, \ + { "pextrw", 1, "=" r32R64}, \ + { "pextrw", 2, "xy"}, \ + { "pextrw", 3, "i"}, \ + { "pinsrw", 1, "=xy"}, \ + { "pinsrw", 2, r32R64 "m"}, \ + { "pinsrw", 3, "i"}, \ + { "pmaddwd", 1, "+x,y"}, \ + { "pmaddwd", 2, "xm,ym"}, \ + { "pmaxsw", 1, "+x,y"}, \ + { "pmaxsw", 2, "xm,ym"}, \ + { "pmaxub", 1, "+x,y"}, \ + { "pmaxub", 2, "xm,ym"}, \ + { "pminsw", 1, "+x,y"}, \ + { "pminsw", 2, "xm,ym"}, \ + { "pminub", 1, "+x,y"}, \ + { "pminub", 2, "xm,ym"}, \ + { "pmovmskb", 1, "+" r32R64}, \ + { "pmovmskb", 2, "xy"}, \ + { "pmulhuw", 1, "+x,y"}, \ + { "pmulhuw", 2, "xm,ym"}, \ + { "pmulhw", 1, "+x,y"}, \ + { "pmulhw", 2, "xm,ym"}, \ + { "pmullw", 1, "+x,y"}, \ + { "pmullw", 2, "xm,ym"}, \ + { "pmuludq", 1, "+x,y"}, \ + { "pmuludq", 2, "xm,ym"}, \ + { "pop", 1, rm16 T(rm32) RM64},\ + { "por", 1, "+x,y"}, \ + { "por", 2, "xm,ym"}, \ + { "prefetchnta", 1, "m"}, \ + { "prefetcht0", 1, "m"}, \ + { "prefetcht1", 1, "m"}, \ + { "prefetcht2", 1, "m"}, \ + { "psadbw", 1, "+x,y"}, \ + { "psadbw", 2, "xm,ym"}, \ + { "pshufd", 1, "=x"}, \ + { "pshufd", 2, "xm"}, \ + { "pshufd", 3, "i"}, \ + { "pshufhw", 1, "=x"}, \ + { "pshufhw", 2, "xm"}, \ + { "pshufhw", 3, "i"}, \ + { "pshuflw", 1, "=x"}, \ + { "pshuflw", 2, "xm"}, \ + { "pshuflw", 3, "i"}, \ + { "pshufw", 1, "=y"}, \ + { "pshufw", 2, "ym"}, \ + { "pshufw", 3, "i"}, \ + { "pslld", 1, "+x,y"}, \ + { "pslld", 2, "xmi,ymi"}, \ + { "pslldq", 1, "+x"}, \ + { "pslldq", 2, "i"}, \ + { "psllq", 1, "+x,y"}, \ + { "psllq", 2, "xmi,ymi"}, \ + { "psllw", 1, "+x,y"}, \ + { "psllw", 2, "xmi,ymi"}, \ + { "psrad", 1, "+x,y"}, \ + { "psrad", 2, "xmi,ymi"}, \ + { "psraw", 1, "+x,y"}, \ + { "psraw", 2, "xmi,ymi"}, \ + { "psrld", 1, "+x,y"}, \ + { "psrld", 2, "xmi,ymi"}, \ + { "psrldq", 1, "+x"}, \ + { "psrldq", 2, "i"}, \ + { "psrlq", 1, "+x,y"}, \ + { "psrlq", 2, "xmi,ymi"}, \ + { "psrlw", 1, "+x,y"}, \ + { "psrlw", 2, "xmi,ymi"}, \ + { "psubb", 1, "+x,y"}, \ + { "psubb", 2, "xm,ym"}, \ + { "psubd", 1, "+x,y"}, \ + { "psubd", 2, "xm,ym"}, \ + { "psubq", 1, "+x,y"}, \ + { "psubq", 2, "xm,ym"}, \ + { "psubsb", 1, "+x,y"}, \ + { "psubsb", 2, "xm,ym"}, \ + { "psubsw", 1, "+x,y"}, \ + { "psubsw", 2, "xm,ym"}, \ + { "psubusb", 1, "+x,y"}, \ + { "psubusb", 2, "xm,ym"}, \ + { "psubusw", 1, "+x,y"}, \ + { "psubusw", 2, "xm,ym"}, \ + { "psubw", 1, "+x,y"}, \ + { "psubw", 2, "xm,ym"}, \ + { "punpckhbw", 1, "+x,y"}, \ + { "punpckhbw", 2, "xm,ym"}, \ + { "punpckhdq", 1, "+x,y"}, \ + { "punpckhdq", 2, "xm,ym"}, \ + { "punpckhqdq", 1, "+x"}, \ + { "punpckhqdq", 2, "xm"}, \ + { "punpckhwd", 1, "+x,y"}, \ + { "punpckhwd", 2, "xm,ym"}, \ + { "punpcklbw", 1, "+x,y"}, \ + { "punpcklbw", 2, "xm,ym"}, \ + { "punpckldq", 1, "+x,y"}, \ + { "punpckldq", 2, "xm,ym"}, \ + { "punpcklqdq", 1, "+x"}, \ + { "punpcklqdq", 2, "xm"}, \ + { "punpcklwd", 1, "+x,y"}, \ + { "punpcklwd", 2, "xm,ym"}, \ + { "push", 1, rm16 T(rm32) RM64 "i"},\ + { "pxor", 1, "+x,y"}, \ + { "pxor", 2, "xm,ym"}, \ + { "rcl", 1, "+" rm8rm16rm32}, \ + { "rcl", 2, "ic"}, \ + { "rcpps", 1, "+x"}, \ + { "rcpps", 2, "xm"}, \ + { "rcpss", 1, "+x"}, \ + { "rcpss", 2, "xm"}, \ + { "rcr", 1, "+" rm8rm16rm32}, \ + { "rcr", 2, "ic"}, \ + { "ret", 1, "i"}, \ + { "rol", 1, "+" rm8rm16rm32}, \ + { "rol", 2, "ic"}, \ + { "ror", 1, "+" rm8rm16rm32}, \ + { "ror", 2, "ic"}, \ + { "rsqrtps", 1, "=x"}, \ + { "rsqrtps", 2, "xm"}, \ + { "rsqrtss", 1, "=x"}, \ + { "rsqrtss", 2, "xm"}, \ + { "sal", 1, "+" rm8rm16rm32}, \ + { "sal", 2, "ic"}, \ + { "sar", 1, "+" rm8rm16rm32}, \ + { "sar", 2, "ic"}, \ + { "sbb", 1, "+" rm8 "," rm16 "," rm32 "," r8 "," r16 "," r32},\ + { "sbb", 2, ri8 "," ri16 "," ri32 "," m8 "," m16 "," m32},\ + { "scas", 1, m8m16m32M64}, \ + { "seta", 1, "=qm"}, \ + { "setae", 1, "=qm"}, \ + { "setb", 1, "=qm"}, \ + { "setbe", 1, "=qm"}, \ + { "setc", 1, "=qm"}, \ + { "sete", 1, "=qm"}, \ + { "setg", 1, "=qm"}, \ + { "setge", 1, "=qm"}, \ + { "setl", 1, "=qm"}, \ + { "setle", 1, "=qm"}, \ + { "setna", 1, "=qm"}, \ + { "setnae", 1, "=qm"}, \ + { "setnb", 1, "=qm"}, \ + { "setnbe", 1, "=qm"}, \ + { "setnc", 1, "=qm"}, \ + { "setne", 1, "=qm"}, \ + { "setng", 1, "=qm"}, \ + { "setnge", 1, "=qm"}, \ + { "setnl", 1, "=qm"}, \ + { "setnle", 1, "=qm"}, \ + { "setno", 1, "=qm"}, \ + { "setnp", 1, "=qm"}, \ + { "setns", 1, "=qm"}, \ + { "setnz", 1, "=qm"}, \ + { "seto", 1, "=qm"}, \ + { "setp", 1, "=qm"}, \ + { "setpe", 1, "=qm"}, \ + { "setpo", 1, "=qm"}, \ + { "sets", 1, "=qm"}, \ + { "setz", 1, "=qm"}, \ + { NY "sgdt", 1, "=m"}, \ + { "shl", 1, "+" rm8rm16rm32}, \ + { "shl", 2, "ic"}, \ + { "shld", 1, "+" rm16 "," rm32 C RM64},\ + { "shld", 2, r16 "," r32 C R64},\ + { "shld", 3, "ic,ic" X(",ic")},\ + { "shr", 1, "+" rm8rm16rm32}, \ + { "shr", 2, "ic"}, \ + { "shrd", 1, "+" rm16 "," rm32 C RM64},\ + { "shrd", 2, r16 "," r32 C R64},\ + { "shrd", 3, "ic,ic" X(",ic")}, \ + { "shufpd", 1, "+x"}, \ + { "shufpd", 2, "xm"}, \ + { "shufpd", 3, "i"}, \ + { "shufps", 1, "+x"}, \ + { "shufps", 2, "xm"}, \ + { "shufps", 3, "i"}, \ + { NY "sidt", 1, "=m"}, \ + { "sldt", 1, "=q" S("2") "m"},\ + { "smsw", 1, "=q" S("2") "m"},\ + { "sqrtpd", 1, "=x"}, \ + { "sqrtpd", 2, "xm"}, \ + { "sqrtps", 1, "=x"}, \ + { "sqrtps", 2, "xm"}, \ + { "sqrtsd", 1, "=x"}, \ + { "sqrtsd", 2, "xm"}, \ + { "sqrtss", 1, "=x"}, \ + { "sqrtss", 2, "xm"}, \ + { "stmxcsr", 1, "m"}, \ + { "stos", 1, "=m"}, \ + { "str", 1, "=q" S("2") "m"},\ + { "sub", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "sub", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "subpd", 1, "+x"}, \ + { "subpd", 2, "xm"}, \ + { "subps", 1, "+x"}, \ + { "subps", 2, "xm"}, \ + { "subsd", 1, "+x"}, \ + { "subsd", 2, "xm"}, \ + { "subss", 1, "+x"}, \ + { "subss", 2, "xm"}, \ + { "test", 1, "+r," rm8rm16rm32},\ + { "test", 2, "r,i"}, \ + { "ucomisd", 1, "+x"}, \ + { "ucomisd", 2, "xm"}, \ + { "ucomiss", 1, "+x"}, \ + { "ucomiss", 2, "xm"}, \ + { "unpckhpd", 1, "+x"}, \ + { "unpckhpd", 2, "xm"}, \ + { "unpckhps", 1, "+x"}, \ + { "unpckhps", 2, "xm"}, \ + { "unpcklpd", 1, "+x"}, \ + { "unpcklpd", 2, "xm"}, \ + { "unpcklps", 1, "+x"}, \ + { "unpcklps", 2, "xm"}, \ + { "verr", 1, rm16}, \ + { "verw", 1, rm16}, \ + { "xadd", 1, "+" rm8 "," rm16 "," rm32},\ + { "xadd", 2, r8 "," r16 "," r32},\ + { "xchg", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "xchg", 2, "+" r8 "," r16 "," r32 C R64 "," m8 "," m16 "," m32 C M64},\ + { "xlat", 1, "m"}, \ + { "xor", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\ + { "xor", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\ + { "xorpd", 1, "+x"}, \ + { "xorpd", 2, "xm"}, \ + { "xorps", 1, "+x"}, \ + { "xorps", 2, "xm"}, + +#define TARGET_IASM_EXTRA_CLOBBERS \ + { "rdtsc", { "edx", "eax"} } + +#define IASM_FUNCTION_MODIFIER "P" + +#define IASM_REGISTER_NAME(STR, BUF) i386_iasm_register_name (STR, BUF) + +/* APPLE LOCAL end CW asm blocks */ + +/* Flag to mark data that is in the large address area. */ +#define SYMBOL_FLAG_FAR_ADDR (SYMBOL_FLAG_MACH_DEP << 0) +#define SYMBOL_REF_FAR_ADDR_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0) +/* +Local variables: +version-control: t +End: +*/ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.md b/gcc-4.2.1-5666.3/gcc/config/i386/i386.md new file mode 100644 index 000000000..e825a0474 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.md @@ -0,0 +1,21399 @@ +;; GCC machine description for IA-32 and x86-64. +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2006 +;; Free Software Foundation, Inc. +;; Mostly by William Schelter. +;; x86_64 support added by Jan Hubicka +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. */ +;; +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. +;; +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. +;; +;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register +;; constraint letters. +;; +;; The special asm out single letter directives following a '%' are: +;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of +;; operands[1]. +;; 'L' Print the opcode suffix for a 32-bit integer opcode. +;; 'W' Print the opcode suffix for a 16-bit integer opcode. +;; 'B' Print the opcode suffix for an 8-bit integer opcode. +;; 'Q' Print the opcode suffix for a 64-bit float opcode. +;; 'S' Print the opcode suffix for a 32-bit float opcode. +;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode. +;; 'J' Print the appropriate jump operand. +;; +;; 'b' Print the QImode name of the register for the indicated operand. +;; %b0 would print %al if operands[0] is reg 0. +;; 'w' Likewise, print the HImode name of the register. +;; 'k' Likewise, print the SImode name of the register. +;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. +;; 'y' Print "st(0)" instead of "st" as a register. + +;; UNSPEC usage: + +(define_constants + [; Relocation specifiers + (UNSPEC_GOT 0) + (UNSPEC_GOTOFF 1) + (UNSPEC_GOTPCREL 2) + (UNSPEC_GOTTPOFF 3) + (UNSPEC_TPOFF 4) + (UNSPEC_NTPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_GOTNTPOFF 7) + (UNSPEC_INDNTPOFF 8) + + ; Prologue support + (UNSPEC_STACK_ALLOC 11) + (UNSPEC_SET_GOT 12) + (UNSPEC_SSE_PROLOGUE_SAVE 13) + (UNSPEC_REG_SAVE 14) + (UNSPEC_DEF_CFA 15) + + ; TLS support + (UNSPEC_TP 16) + (UNSPEC_TLS_GD 17) + (UNSPEC_TLS_LD_BASE 18) + (UNSPEC_TLSDESC 19) + + ; Other random patterns + (UNSPEC_SCAS 20) + (UNSPEC_FNSTSW 21) + (UNSPEC_SAHF 22) + (UNSPEC_FSTCW 23) + (UNSPEC_ADD_CARRY 24) + (UNSPEC_FLDCW 25) + (UNSPEC_REP 26) + (UNSPEC_EH_RETURN 27) + (UNSPEC_LD_MPIC 28) ; load_macho_picbase + + ; For SSE/MMX support: + (UNSPEC_FIX_NOTRUNC 30) + (UNSPEC_MASKMOV 31) + (UNSPEC_MOVMSK 32) + (UNSPEC_MOVNT 33) + (UNSPEC_MOVU 34) + (UNSPEC_RCP 35) + (UNSPEC_RSQRT 36) + (UNSPEC_SFENCE 37) + (UNSPEC_NOP 38) ; prevents combiner cleverness + (UNSPEC_PFRCP 39) + (UNSPEC_PFRCPIT1 40) + (UNSPEC_PFRCPIT2 41) + (UNSPEC_PFRSQRT 42) + (UNSPEC_PFRSQIT1 43) + (UNSPEC_MFENCE 44) + (UNSPEC_LFENCE 45) + (UNSPEC_PSADBW 46) + (UNSPEC_LDQQU 47) + ; APPLE LOCAL begin 4121692 + (UNSPEC_LDQ 201) + (UNSPEC_MOVQ 202) + (UNSPEC_STOQ 203) + ; APPLE LOCAL end 4121692 + + ; Generic math support + (UNSPEC_COPYSIGN 50) + (UNSPEC_IEEE_MIN 51) ; not commutative + (UNSPEC_IEEE_MAX 52) ; not commutative + + ; x87 Floating point + (UNSPEC_SIN 60) + (UNSPEC_COS 61) + (UNSPEC_FPATAN 62) + (UNSPEC_FYL2X 63) + (UNSPEC_FYL2XP1 64) + (UNSPEC_FRNDINT 65) + (UNSPEC_FIST 66) + (UNSPEC_F2XM1 67) + + ; x87 Rounding + (UNSPEC_FRNDINT_FLOOR 70) + (UNSPEC_FRNDINT_CEIL 71) + (UNSPEC_FRNDINT_TRUNC 72) + (UNSPEC_FRNDINT_MASK_PM 73) + (UNSPEC_FIST_FLOOR 74) + (UNSPEC_FIST_CEIL 75) + ; APPLE LOCAL 3399553 + (UNSPEC_FLT_ROUNDS 76) + + ; x87 Double output FP + (UNSPEC_SINCOS_COS 80) + (UNSPEC_SINCOS_SIN 81) + (UNSPEC_TAN_ONE 82) + (UNSPEC_TAN_TAN 83) + (UNSPEC_XTRACT_FRACT 84) + (UNSPEC_XTRACT_EXP 85) + (UNSPEC_FSCALE_FRACT 86) + (UNSPEC_FSCALE_EXP 87) + (UNSPEC_FPREM_F 88) + (UNSPEC_FPREM_U 89) + (UNSPEC_FPREM1_F 90) + (UNSPEC_FPREM1_U 91) + + ; SSP patterns + (UNSPEC_SP_SET 100) + (UNSPEC_SP_TEST 101) + (UNSPEC_SP_TLS_SET 102) + (UNSPEC_SP_TLS_TEST 103) + ; APPLE LOCAL begin mainline + ; SSSE3 + (UNSPEC_PSHUFB 220) + (UNSPEC_PSIGN 221) + (UNSPEC_PALIGNR 222) + ; APPLE LOCAL end mainline + ; APPLE LOCAL begin 5612787 mainline sse4 + ; For SSE4A support + (UNSPEC_EXTRQI 130) + (UNSPEC_EXTRQ 131) + (UNSPEC_INSERTQI 132) + (UNSPEC_INSERTQ 133) + + ; For SSE4.1 support + (UNSPEC_BLENDV 134) + (UNSPEC_INSERTPS 135) + (UNSPEC_DP 136) + (UNSPEC_MOVNTDQA 137) + (UNSPEC_MPSADBW 138) + (UNSPEC_PHMINPOSUW 139) + (UNSPEC_PTEST 140) + (UNSPEC_ROUND 141) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) + (UNSPEC_PCMPESTR 144) + (UNSPEC_PCMPISTR 145) + ; APPLE LOCAL end 5612787 mainline sse4 + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_STACK_PROBE 1) + (UNSPECV_EMMS 2) + (UNSPECV_LDMXCSR 3) + (UNSPECV_STMXCSR 4) + (UNSPECV_FEMMS 5) + (UNSPECV_CLFLUSH 6) + (UNSPECV_ALIGN 7) + (UNSPECV_MONITOR 8) + (UNSPECV_MWAIT 9) + (UNSPECV_CMPXCHG_1 10) + (UNSPECV_CMPXCHG_2 11) + (UNSPECV_XCHG 12) + (UNSPECV_LOCK 13) + ]) + +;; Registers by name. +(define_constants + [(BP_REG 6) + (SP_REG 7) + (FLAGS_REG 17) + (FPSR_REG 18) + (DIRFLAG_REG 19) + ]) + +;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +;; from i386.c. + +;; In C guard expressions, put expressions which may be compile-time +;; constants first. This allows for better optimization. For +;; example, write "TARGET_64BIT && reload_completed", not +;; "reload_completed && TARGET_64BIT". + + +;; Processor type. This attribute must exactly match the processor_type +;; enumeration in i386.h. +; APPLE LOCAL mainline 2006-04-19 4434601 +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64" + (const (symbol_ref "ix86_tune"))) + +;; A basic instruction type. Refinements due to arguments to be +;; provided in other attributes. +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_attr "type" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv,leave, + str,cld, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, + sselog,sselog1,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" + (const_string "other")) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Main data type used by the insn +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF" + (const_string "unknown")) + +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") + (const_string "i387") + (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) + +;; The (bounding maximum) length of an instruction immediate. +(define_attr "length_immediate" "" + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave") + (const_int 0) + (eq_attr "unit" "i387,sse,mmx") + (const_int 0) + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") + (symbol_ref "ix86_attr_length_immediate_default(insn,1)") + (eq_attr "type" "imov,test") + (symbol_ref "ix86_attr_length_immediate_default(insn,0)") + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. + (eq_attr "type" "ibr") + (const_int 1) + ] + (symbol_ref "/* Update immediate_length and other attributes! */ + gcc_unreachable (),1"))) + +;; The (bounding maximum) length of an instruction address. +(define_attr "length_address" "" + (cond [(eq_attr "type" "str,cld,other,multi,fxch") + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (symbol_ref "ix86_attr_length_address_default (insn)"))) + +;; Set when length prefix is used. +(define_attr "prefix_data16" "" + (if_then_else (ior (eq_attr "mode" "HI") + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF"))) + (const_int 1) + (const_int 0))) + +;; Set when string REP prefix is used. +(define_attr "prefix_rep" "" + (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + (const_int 0))) + +;; Set when 0f opcode prefix is used. +(define_attr "prefix_0f" "" + (if_then_else + (ior (eq_attr "type" "imovx,setcc,icmov") + (eq_attr "unit" "sse,mmx")) + (const_int 1) + (const_int 0))) + +;; Set when REX opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(and (eq_attr "mode" "DI") + (eq_attr "type" "!push,pop,call,callv,leave,ibr")) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + ] + (const_int 0))) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; There are also additional prefixes in SSSE3. +(define_attr "prefix_extra" "" (const_int 0)) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Set when modrm byte is used. +(define_attr "modrm" "" + (cond [(eq_attr "type" "str,cld,leave") + (const_int 0) + (eq_attr "unit" "i387") + (const_int 0) + (and (eq_attr "type" "incdec") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand:HI 1 "register_operand" ""))) + (const_int 0) + (and (eq_attr "type" "push") + (not (match_operand 1 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "pop") + (not (match_operand 0 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "imov") + (ior (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" "")) + (ior (and (match_operand 0 "ax_reg_operand" "") + (match_operand 1 "memory_displacement_only_operand" "")) + (and (match_operand 0 "memory_displacement_only_operand" "") + (match_operand 1 "ax_reg_operand" ""))))) + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (const_int 1))) + +;; The (bounding maximum) length of an instruction in bytes. +;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. +;; Later we may want to split them and compute proper length as for +;; other insns. +(define_attr "length" "" + (cond [(eq_attr "type" "other,multi,fistp,frndint") + (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address")))] + (plus (plus (attr "modrm") + (plus (attr "prefix_0f") + (plus (attr "prefix_rex") + (const_int 1)))) + (plus (attr "prefix_rep") + (plus (attr "prefix_data16") + (plus (attr "length_immediate") + (attr "length_address"))))))) + +;; The `memory' attribute is `none' if no memory is referenced, `load' or +;; `store' if there is a simple memory reference therein, or `unknown' +;; if the instruction is complex. + +(define_attr "memory" "none,load,store,both,unknown" + (cond [(eq_attr "type" "other,multi,str") + (const_string "unknown") + (eq_attr "type" "lea,fcmov,fpspc,cld") + (const_string "none") + (eq_attr "type" "fistp,leave") + (const_string "both") + (eq_attr "type" "frndint") + (const_string "load") + (eq_attr "type" "push") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "both") + (const_string "store")) + (eq_attr "type" "pop") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "both") + (const_string "load")) + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") + (if_then_else (ior (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "load") + (const_string "none")) + (eq_attr "type" "ibr") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "load") + (const_string "none")) + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (and (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (match_operand 0 "memory_operand" "") + (const_string "store") + (match_operand 1 "memory_operand" "") + (const_string "load") + (and (eq_attr "type" + "!alu1,negnot,ishift1, + imov,imovx,icmp,test, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, + mmx,mmxmov,mmxcmp,mmxcvt") + (match_operand 2 "memory_operand" "")) + (const_string "load") + (and (eq_attr "type" "icmov") + (match_operand 3 "memory_operand" "")) + (const_string "load") + ] + (const_string "none"))) + +;; Indicates if an instruction has both an immediate and a displacement. + +(define_attr "imm_disp" "false,true,unknown" + (cond [(eq_attr "type" "other,multi") + (const_string "unknown") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "true") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 2 "immediate_operand" ""))) + (const_string "true") + ] + (const_string "false"))) + +;; Indicates if an FP operation has an integer source. + +(define_attr "fp_int_src" "false,true" + (const_string "false")) + +;; Defines rounding mode of an FP operation. + +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" + (const_string "any")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "length" "128") + (set_attr "type" "multi")]) + +;; All x87 floating point modes +(define_mode_macro X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_macro X87MODEI [HI SI DI]) + +;; All integer modes handled by integer x87 operators. +(define_mode_macro X87MODEI12 [HI SI]) + +;; All SSE floating point modes +(define_mode_macro SSEMODEF [SF DF]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_macro SSEMODEI24 [SI DI]) + + +;; Scheduling descriptions + +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; Compare instructions. + +;; All compare insns have expanders that save the operands away without +;; actually generating RTL. The bCOND or sCOND (emitted immediately +;; after the cmp) will actually emit the cmpM. + +(define_expand "cmpti" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "x86_64_general_operand" "")))] + "TARGET_64BIT" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (TImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpdi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "x86_64_general_operand" "")))] + "" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (DImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpsi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SI 0 "cmpsi_operand" "") + (match_operand:SI 1 "general_operand" "")))] + "" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (SImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmphi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" "")))] + "" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (HImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpqi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" "")))] + "TARGET_QIMODE_MATH" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (QImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_insn "cmpdi_ccno_1_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr") + (match_operand:DI 1 "const0_operand" "n,n")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "@ + test{q}\t{%0, %0|%0, %0} + cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*cmpdi_minus_1_rex64" + [(set (reg FLAGS_REG) + (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + +(define_expand "cmpdi_1_rex64" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "cmpdi_1_insn_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + + +(define_insn "*cmpsi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr") + (match_operand:SI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{l}\t{%0, %0|%0, %0} + cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*cmpsi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_expand "cmpsi_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")))] + "" + "") + +(define_insn "*cmpsi_1_insn" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_insn "*cmphi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr") + (match_operand:HI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{w}\t{%0, %0|%0, %0} + cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "ri,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_1" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "ri,mr")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmpqi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq") + (match_operand:QI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{b}\t{%0, %0|%0, %0} + cmp{b}\t{$0, %0|%0, 0}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_1" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qi,mq")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qi,mq")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "general_operand" "Qm") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "register_operand" "Q") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const0_operand" "n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t%h0, %h0" + [(set_attr "type" "test") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_expand "cmpqi_ext_3" + [(set (reg:CC FLAGS_REG) + (compare:CC + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "")))] + "" + "") + +(define_insn "cmpqi_ext_3_insn" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "Qmn")))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "cmpqi_ext_3_insn_rex64" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "nonmemory_operand" "Qn")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_4" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +;; These implement float point compares. +;; %%% See if we can get away with VOIDmode operands on the actual insns, +;; which would allow mix and match FP modes on the compares. Which is what +;; the old patterns did, but with many more of them. + +(define_expand "cmpxf" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 0 "nonmemory_operand" "") + (match_operand:XF 1 "nonmemory_operand" "")))] + "TARGET_80387" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpdf" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "") + (match_operand:DF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpsf" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "") + (match_operand:SF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +;; FP compares, step 1: +;; Set the FP condition codes. +;; +;; CCFPmode compare with exceptions +;; CCFPUmode compare with no exceptions + +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. + +(define_insn "*cmpfp_0" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X"))] + UNSPEC_FNSTSW))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_sf" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "SF")]) + +(define_insn "*cmpfp_df" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "DF")]) + +(define_insn "*cmpfp_xf" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_u" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 1);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_<mode>" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] + UNSPEC_FNSTSW))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +;; FP compares, step 2 +;; Move the fpsw to ax. + +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "SI") + (set_attr "unit" "i387")]) + +;; FP compares, step 3 +;; Get ax into flags, general case. + +(define_insn "x86_sahf_1" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))] + "!TARGET_64BIT" + "sahf" + [(set_attr "length" "1") + (set_attr "athlon_decode" "vector") + (set_attr "mode" "SI")]) + +;; Pentium Pro can do steps 1 through 3 in one go. + +(define_insn "*cmpfp_i_mixed" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp,ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_i_sse" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_i_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_mixed" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp,ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_sse" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector")]) + +;; Move instructions. + +;; General case of fullword move. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "ix86_expand_move (SImode, operands); DONE;") + +;; Push/pop instructions. They are separate since autoinc/dec is not a +;; general_operand. +;; +;; %%% We don't use a post-inc memory reference because x86 is not a +;; general AUTO_INC_DEC host, which impacts how it is treated in flow. +;; Changing this impacts compiler performance on other non-AUTO_INC_DEC +;; targets without our curiosities, and it is just as easy to represent +;; this differently. + +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushsi2_rex64" + [(set (match_operand:SI 0 "push_operand" "=X") + (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*pushsi2_prologue" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m")) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*popsi1_epilogue" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "popsi1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "*movsi_xor" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "const0_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "xor{l}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_or" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx + && (TARGET_PENTIUM || optimize_size)" +{ + operands[1] = constm1_rtx; + return "or{l}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movsi_1" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + (match_operand:SI 1 "general_operand" + "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] +;; APPLE LOCAL end 5612787 mainline sse4 + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + return "xorps\t%0, %0"; + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "movd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "movss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case TYPE_MMXADD: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{l}\t{%1, %0|%0, %1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "mmxadd") + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "ssemov") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabssi_1_rex64" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{l}\t{%1, %P0|%P0, %1} + mov{l}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*movabssi_2_rex64" + [(set (match_operand:SI 0 "register_operand" "=a,r") + (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{l}\t{%P1, %0|%0, %P1} + mov{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "SI")]) + +(define_insn "*swapsi" + [(set (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{l}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "ix86_expand_move (HImode, operands); DONE;") + +(define_insn "*pushhi2" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushhi2_rex64" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*movhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(ne (symbol_ref "optimize_size") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "imov") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "SI") + ] + (const_string "HI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabshi_1_rex64" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{w}\t{%1, %P0|%P0, %1} + mov{w}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*movabshi_2_rex64" + [(set (match_operand:HI 0 "register_operand" "=a,r") + (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{w}\t{%P1, %0|%0, %P1} + mov{w}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "HI")]) + +(define_insn "*swaphi_1" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_size" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*swaphi_2" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{w}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "HI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstricthi" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "")) + (match_operand:HI 1 "general_operand" ""))] + "! TARGET_PARTIAL_REG_STALL || optimize_size" +{ + /* Don't generate memory->memory moves, go through a register */ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_insn "*movstricthi_1" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r")) + (match_operand:HI 1 "general_operand" "rn,m"))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "mov{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "HI")]) + +(define_insn "*movstricthi_xor" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (match_operand:HI 1 "const0_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)" + "xor{w}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "HI") + (set_attr "length_immediate" "0")]) + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "ix86_expand_move (QImode, operands); DONE;") + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. + +(define_insn "*pushqi2" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushqi2_rex64" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "qi"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. +(define_insn "*movqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_size") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "3") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_size") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0))))) + (const_string "SI") + ] + (const_string "QI")))]) + +(define_expand "reload_outqi" + [(parallel [(match_operand:QI 0 "" "=m") + (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "=&q")])] + "" +{ + rtx op0, op1, op2; + op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; + + gcc_assert (!reg_overlap_mentioned_p (op2, op0)); + if (! q_regs_operand (op1, QImode)) + { + emit_insn (gen_movqi (op2, op1)); + op1 = op2; + } + emit_insn (gen_movqi (op0, op1)); + DONE; +}) + +(define_insn "*swapqi_1" + [(set (match_operand:QI 0 "register_operand" "+r") + (match_operand:QI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_size" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*swapqi_2" + [(set (match_operand:QI 0 "register_operand" "+q") + (match_operand:QI 1 "register_operand" "+q")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{b}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "QI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstrictqi" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (match_operand:QI 1 "general_operand" ""))] + "! TARGET_PARTIAL_REG_STALL || optimize_size" +{ + /* Don't generate memory->memory moves, go through a register. */ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*movstrictqi_1" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (match_operand:QI 1 "general_operand" "*qn,m"))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "mov{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +; APPLE LOCAL begin radar 4645709 5131847 +; It is based on movstrictqi_xor where partial register update is performed. +; If optimize_size is not set, it is better to update the whole register. +(define_insn "*movstrictqi_and" + [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) + (match_operand:QI 1 "const0_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && (!TARGET_USE_MOV0 && !optimize_size)" +{ + if (TARGET_64BIT) + return "and{q}\t{$0xffffffffffffff00, %q0}"; + else + return "and{l}\t{$0xffffff00, %k0}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) +; APPLE LOCAL end radar 4645709 5131847 + +(define_insn "*movstrictqi_xor" + [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) + (match_operand:QI 1 "const0_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "xor{b}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_extv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (sign_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movhi_extv_1" + [(set (match_operand:HI 0 "register_operand" "=R") + (sign_extract:HI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extv_1_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsqi_1_rex64" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{b}\t{%1, %P0|%P0, %1} + mov{b}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*movabsqi_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=a,r") + (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{b}\t{%P1, %0|%0, %P1} + mov{b}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "QI")]) + +(define_insn "*movdi_extzv_1" + [(set (match_operand:DI 0 "register_operand" "=R") + (zero_extract:DI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "*movsi_extzv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movz{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzv_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "movsi_insv_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "general_operand" "Qmn"))] + "!TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "movdi_insv_1_rex64" + [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:DI 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + "ix86_expand_move (DImode, operands); DONE;") + +(define_insn "*pushdi" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "riF*m"))] + "!TARGET_64BIT" + "#") + +(define_insn "*pushdi2_rex64" + [(set (match_operand:DI 0 "push_operand" "=<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + "TARGET_64BIT" + "@ + push{q}\t%1 + #" + [(set_attr "type" "push,multi") + (set_attr "mode" "DI")]) + +;; Convert impossible pushes of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, push sign extended lower part first and then overwrite +;; upper part by 32bit move. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (operands + 1, 1, operands + 2, operands + 3); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (operands + 1, 1, operands + 2, operands + 3); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") + +(define_insn "*pushdi2_prologue_rex64" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "re*m")) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "push{q}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*popdi1_epilogue_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "popdi1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "*movdi_xor_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const0_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size) + && reload_completed" + "xor{l}\t{%k0, %k0|%k0, %k0}" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdi_or_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const_int_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_PENTIUM || optimize_size) + && reload_completed + && operands[1] == constm1_rtx" +{ + operands[1] = constm1_rtx; + return "or{q}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "DI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movdi_2" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Yt,m ,*Yt,*Yt,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Yt,*Yt,m ,C ,*x,*x,m "))] +;; APPLE LOCAL end 5612787 mainline sse4 + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + # + # + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; %%% This multiword shite has got to go. +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0])) + && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y") + (match_operand:DI 1 "general_operand" + "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSECVT: + if (which_alternative == 13) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) + return "movdqa\t{%1, %0|%0, %1}"; + /* FALLTHRU */ + case TYPE_MMXMOV: + /* Moves from and into integer register is done using movd opcode with + REX prefix. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "movd\t{%1, %0|%0, %1}"; + return "movq\t{%1, %0|%0, %1}"; + case TYPE_SSELOG1: + case TYPE_MMXADD: + return "pxor\t%0, %0"; + case TYPE_MULTI: + return "#"; + case TYPE_LEA: + return "lea{q}\t{%a1, %0|%0, %a1}"; + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 2) + return "movabs{q}\t{%1, %0|%0, %1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "mmxadd") + (eq_attr "alternative" "6,7,8") + (const_string "mmxmov") + (eq_attr "alternative" "9") + (const_string "sselog1") + (eq_attr "alternative" "10,11,12") + (const_string "ssemov") + (eq_attr "alternative" "13,14") + (const_string "ssecvt") + (eq_attr "alternative" "4") + (const_string "multi") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsdi_1_rex64" + [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:DI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{q}\t{%1, %P0|%P0, %1} + mov{q}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*movabsdi_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=a,r") + (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{q}\t{%P1, %0|%0, %P1} + mov{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "DI")]) + +;; Convert impossible stores of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, move by 32bit parts. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (operands, 2, operands + 2, operands + 4);") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (operands, 2, operands + 2, operands + 4);") + +(define_insn "*swapdi_rex64" + [(set (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_64BIT" + "xchg{q}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "DI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_size") (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) + +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "ix86_expand_move (SFmode, operands); DONE;") + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,DI,SF")]) + +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" ""))] + "reload_completed + && GET_CODE (operands[1]) == MEM + && constant_pool_reference_p (operands[1])" + [(set (match_dup 0) + (match_dup 1))] + "operands[1] = avoid_constant_pool_reference (operands[1]);") + + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (set (mem:SF (reg:SI SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (set (mem:SF (reg:DI SP_REG)) (match_dup 1))]) + +(define_insn "*movsf_1" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m ,f,r ,m ,x,x,x ,m ,!*y,!rm,!*y") + (match_operand:SF 1 "general_operand" + "fm,f,G ,rmF,Fr,C ,x ,xm,x,rm ,*y ,*y"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], SFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; + case 6: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 7: + case 8: + return "movss\t{%1, %0|%0, %1}"; + + case 9: + case 10: + return "movd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) + +(define_insn "*swapsf" + [(set (match_operand:SF 0 "fp_register_operand" "+f") + (match_operand:SF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "reload_completed || TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "SF")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "ix86_expand_move (DFmode, operands); DONE;") + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Yt"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") + (set_attr "mode" "DF,SI,SI,DF")]) + +(define_insn "*pushdf_integer" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Yt"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "DF,SI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT && reload_completed" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (set (mem:DF (reg:SI SP_REG)) (match_dup 1))] + "") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT && reload_completed" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (set (mem:DF (reg:DI SP_REG)) (match_dup 1))] + "") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_nointeger" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,*r ,o ,Yt*x,Yt*x,Yt*x ,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,*roF,F*r,C ,Yt*x,mYt*x,Yt*x"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_insn "*movdf_integer" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,o ,Yt*x,Yt*x,Yt*x,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,roF,Fr,C ,Yt*x,m ,Yt*x"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_split + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*swapdf" + [(set (match_operand:DF 0 "fp_register_operand" "+f") + (match_operand:DF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "reload_completed || TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "DF")]) + +(define_expand "movxf" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "" + "ix86_expand_move (XFmode, operands); DONE;") + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references. +;; (assuming that any given constant is pushed only once, but this ought to be +;; handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_size" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "XF,SI,SI")]) + +(define_insn "*pushxf_integer" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "!optimize_size" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set_attr "mode" "XF,SI")]) + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2))) + (set (mem:XF (reg:SI SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2))) + (set (mem:XF (reg:DI SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "optimize_size + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_insn "*movxf_integer" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "!optimize_size + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_split + [(set (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && GET_MODE (operands[0]) == XFmode + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "memory_operand" ""))] + "reload_completed + && GET_CODE (operands[1]) == MEM + && (GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) + && constant_pool_reference_p (operands[1])" + [(set (match_dup 0) (match_dup 1))] +{ + rtx c = avoid_constant_pool_reference (operands[1]); + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; + + operands[1] = c; +}) + +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Zero extension instructions + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + { + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "zero_extendhisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "*zero_extendhisi2_movzwl" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "movz{wl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendqihi2" + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*zero_extendqihi2_and" + [(set (match_operand:HI 0 "register_operand" "=r,?&q") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) + +(define_insn "*zero_extendqihi2_movzbw_and" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "HI")]) + +; zero extend to SImode here to avoid partial register stalls +(define_insn "*zero_extendqihi2_movzbl" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" + "movz{bl|x}\t{%1, %k0|%k0, %k1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; For the movzbw case strip only the clobber +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "zero_extendqisi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*zero_extendqisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r,?&q") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendqisi2_movzbw_and" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendqisi2_movzbw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" + "movz{bl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; For the movzbl case strip only the clobber +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_dup 0) + (zero_extend:SI (match_dup 1)))]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM) + && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; %%% Kill me once multi-word ops are sane. +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] + "" + "if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + DONE; + } + ") + +(define_insn "zero_extendsidi2_32" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Yi") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm"))) + (clobber (reg:CC FLAGS_REG))] +;; APPLE LOCAL end 5612787 mainline sse4 + "!TARGET_64BIT" + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "mode" "SI,SI,SI,DI,TI") + (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) + +(define_insn "zero_extendsidi2_rex64" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Yi") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_64BIT" + "@ + mov\t{%k1, %k0|%k0, %k1} + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,ssemov") + (set_attr "mode" "SI,DI,SI,SI")]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (zero_extend:DI (match_dup 0)))] + "TARGET_64BIT" + [(set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movz{wl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movz{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +;; Sign extension instructions + +(define_expand "extendsidi2" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 ""))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" + "#") + +(define_insn "extendsidi2_rex64" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] + "TARGET_64BIT" + "@ + {cltq|cdqe} + movs{lq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI") + (set_attr "prefix_0f" "0") + (set_attr "modrm" "0,1")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movs{wq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "TARGET_64BIT" + "movs{bq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +;; Extend to memory case when source register does die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "(reload_completed + && dead_or_set_p (insn, operands[1]) + && !reg_mentioned_p (operands[1], operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 4) (match_dup 1))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +;; Extend to memory case when source register does not die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "reload_completed" + [(const_int 0)] +{ + split_di (&operands[0], 1, &operands[3], &operands[4]); + + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if (true_regnum (operands[1]) == 0 + && true_regnum (operands[2]) == 1 + && (optimize_size || TARGET_USE_CLTD)) + { + emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31))); + } + emit_move_insn (operands[4], operands[2]); + DONE; +}) + +;; Extend to register case. Optimize case where source and destination +;; registers match and cases where we can use cltd. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 ""))] + "reload_completed" + [(const_int 0)] +{ + split_di (&operands[0], 1, &operands[3], &operands[4]); + + if (true_regnum (operands[3]) != true_regnum (operands[1])) + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if (true_regnum (operands[3]) == 0 + && (optimize_size || TARGET_USE_CLTD)) + { + emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); + DONE; + } + + if (true_regnum (operands[4]) != true_regnum (operands[1])) + emit_move_insn (operands[4], operands[1]); + + emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31))); + DONE; +}) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=*a,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "*extendhisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (zero_extend:DI + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + "TARGET_64BIT" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%k0|%k0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "" + "movs{bl|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*extendqisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] + "TARGET_64BIT" + "movs{bl|x}\t{%1,%k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; Conversions between float and double. + +;; These are all no-ops in the model used for the 80387. So just +;; emit moves. + +;; %%% Kill these when call knows how to work out a DFmode push earlier. +(define_insn "*dummy_extendsfdf2" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "push_operand" "=<") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fYt")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "0" + "#") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (set (mem:DF (reg:SI SP_REG)) (float_extend:DF (match_dup 1)))]) + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (set (mem:DF (reg:DI SP_REG)) (float_extend:DF (match_dup 1)))]) + +(define_insn "*dummy_extendsfxf2" + [(set (match_operand:XF 0 "push_operand" "=<") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))] + "0" + "#") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2))) + (set (mem:XF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2))) + (set (mem:DF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2))) + (set (mem:DF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2))) + (set (mem:XF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "*extendsfdf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return "cvtss2sd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "mode" "SF,XF,DF")]) + +(define_insn "*extendsfdf2_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" "=x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "cvtss2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extendsfxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (float_extend:XF (match_operand:SF 1 "general_operand" "")))] + "TARGET_80387" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "*extendsfxf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extenddfxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (float_extend:XF (match_operand:DF 1 "general_operand" "")))] + "TARGET_80387" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], DFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); + } + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (DFmode, operands[1]); +}) + +(define_insn "*extenddfxf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF,XF")]) + +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. + +;; Conversion from DFmode to SFmode. + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (DFmode, operands[1]); + + if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + ; + else if (flag_unsafe_math_optimizations) + ; + else + { + rtx temp = assign_386_stack_local (SFmode, SLOT_VIRTUAL); + emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); + DONE; + } +}) + +(define_expand "truncdfsf2_with_temp" + [(parallel [(set (match_operand:SF 0 "" "") + (float_truncate:SF (match_operand:DF 1 "" ""))) + (clobber (match_operand:SF 2 "" ""))])] + "") + +(define_insn "*truncdfsf_fast_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,f,x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f,xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 1: + return output_387_reg_move (insn, operands); + case 2: + return "cvtsd2ss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "mode" "SF")]) + +;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, +;; because nothing we do here is unsafe. +(define_insn "*truncdfsf_fast_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:SF 0 "nonimmediate_operand" "=x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH" + "cvtsd2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_fast_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r,Yt") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f ,Ytm"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,X"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 1: + return "#"; + case 2: + return "cvtsd2ss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,multi,ssecvt") + (set_attr "unit" "*,i387,*") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m"))] + "TARGET_80387" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 1: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "fp_register_operand" ""))) + (clobber (match_operand 2 "" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] +{ + operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); +}) + +;; Conversion from XFmode to SFmode. + +(define_expand "truncxfsf2" + [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_80387" +{ + if (flag_unsafe_math_optimizations) + { + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode); + emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; + } + else + operands[2] = assign_386_stack_local (SFmode, SLOT_VIRTUAL); +}) + +(define_insn "*truncxfsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] + "TARGET_MIX_SSE_I387" +{ + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "truncxfsf2_i387_noop" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" +{ + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncxfsf2_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f,f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov,multi,multi") + (set_attr "unit" "*,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncxfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:SF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:SF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:SF 0 "memory_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:SF 2 "memory_operand" ""))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] + "") + +;; Conversion from XFmode to DFmode. + +(define_expand "truncxfdf2" + [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_truncate:DF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_80387" +{ + if (flag_unsafe_math_optimizations) + { + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode); + emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; + } + else + operands[2] = assign_386_stack_local (DFmode, SLOT_VIRTUAL); +}) + +(define_insn "*truncxfdf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Yt") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_MIX_SSE_I387" +{ + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "DF")]) + +(define_insn "truncxfdf2_i387_noop" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" +{ + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF")]) + +(define_insn "*truncxfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f,f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov,multi,multi") + (set_attr "unit" "*,i387,i387") + (set_attr "mode" "DF")]) + +(define_insn "*truncxfdf2_i387_1" + [(set (match_operand:DF 0 "memory_operand" "=m") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:DF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (float_truncate:DF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:DF 2 "memory_operand" ""))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] + "") + +;; Signed conversion to DImode. + +(define_expand "fix_truncxfdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_trunc<mode>di2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:SSEMODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))" +{ + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_trunc<mode>si2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:SSEMODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (<MODE>mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to HImode. + +(define_expand "fix_trunc<mode>hi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; APPLE LOCAL begin 4176531 +;; Unsigned conversion to SImode. + +(define_expand "fixuns_trunc<mode>si2" + [(set (match_operand:SI 0 "nonimmediate_operand" "x") + (fix:SI (match_operand:SSEMODEF 1 "register_operand" "x")))] + "!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_SSE2 + && !optimize_size && (ix86_preferred_stack_boundary >= 128)" +{ + /* APPLE LOCAL 4424891 */ + ix86_expand_convert_uns_<MODE>2SI_sse(operands); DONE; +}) + +;; Unsigned conversion to HImode. + +(define_insn "fixuns_truncdfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r") + (fix:HI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "cvttsd2si\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fixuns_truncsfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r") + (fix:HI (match_operand:SF 1 "register_operand" "x,xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "cvttss2si\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) +;; APPLE LOCAL end 4176531 + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncsfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttss2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncdfdi_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Yt,Ytm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttsd2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncsfsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncdfsi_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Yt,Ytm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 +;; APPLE LOCAL begin 5612787 mainline sse4 + [(match_scratch:DF 2 "Yt") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])] + "") + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc<mode>_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fix_trunc<mode>_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_insn "x86_fnstcw_1" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))] + "TARGET_80387" + "fnstcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "unit" "i387")]) + +(define_insn "x86_fldcw_1" + [(set (reg:HI FPSR_REG) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] + "TARGET_80387" + "fldcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "athlon_decode" "vector")]) + +;; Conversion between fixed point and floating point. + +;; Even though we only accept memory inputs, the backend _really_ +;; wants to be able to do this between registers. + +(define_expand "floathisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" +{ + if (TARGET_SSE_MATH) + { + emit_insn (gen_floatsisf2 (operands[0], + convert_to_mode (SImode, operands[1], 0))); + DONE; + } +}) + +(define_insn "*floathisf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") + +(define_insn "*floatsisf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] + "TARGET_MIX_SSE_I387" + "@ + fild%z1\t%1 + # + cvtsi2ss\t{%1, %0|%0, %1} + cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsisf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] + "TARGET_SSE_MATH" + "cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsisf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" +;; APPLE LOCAL begin 6382081 +{ + if (!TARGET_64BIT) + { + rtx XFreg = gen_reg_rtx (XFmode); + rtx SFstack = assign_386_stack_local (SFmode, SLOT_VIRTUAL); + emit_insn (gen_floatdixf2 (copy_rtx (XFreg), operands[1])); + emit_insn (gen_truncxfsf2 (copy_rtx (SFstack), XFreg)); + emit_move_insn (operands[0], SFstack); + DONE; + } +}) +;; APPLE LOCAL end 6382081 + +(define_insn "*floatdisf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] + "TARGET_64BIT && TARGET_MIX_SSE_I387" + "@ + fild%z1\t%1 + # + cvtsi2ss{q}\t{%1, %0|%0, %1} + cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdisf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] + "TARGET_64BIT && TARGET_SSE_MATH" + "cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdisf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_expand "floathidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE2 && TARGET_SSE_MATH) + { + emit_insn (gen_floatsidf2 (operands[0], + convert_to_mode (SImode, operands[1], 0))); + DONE; + } +}) + +(define_insn "*floathidf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387 && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "DF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_insn "*floatsidf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_MIX_SSE_I387" + "@ + fild%z1\t%1 + # + cvtsi2sd\t{%1, %0|%0, %1} + cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "DF") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsidf2_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x,x") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH" + "cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsidf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "DF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_expand "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)" +;; APPLE LOCAL begin 4424891 +{ + if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH) + { + ix86_expand_convert_sign_DI2DF_sse (operands); DONE; + } +}) +;; APPLE LOCAL end 4424891 + +(define_insn "*floatdidf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387" + "@ + fild%z1\t%1 + # + cvtsi2sd{q}\t{%1, %0|%0, %1} + cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "DF") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdidf2_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x,x") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" + "cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdidf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "DF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "floathixf2" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "floatsixf2" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "floatdixf2" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +;; %%% Kill these when reload knows how to do it. +(define_split + [(set (match_operand 0 "fp_register_operand" "") + (float (match_operand 1 "register_operand" "")))] + "reload_completed + && TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_expand "floatunssisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" ""))] + "!TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +;; APPLE LOCAL begin 4424891 +(define_expand "floatunssidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" ""))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && (ix86_preferred_stack_boundary >= 128)" + "x86_emit_floatuns (operands); DONE;") +;; APPLE LOCAL end 4424891 + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + ;; APPLE LOCAL begin 4176531 + "(TARGET_64BIT || !optimize_size) && TARGET_SSE2 && TARGET_SSE_MATH + && (ix86_preferred_stack_boundary >= 128)" + ;; APPLE LOCAL end 4176531 + "x86_emit_floatuns (operands); DONE;") + +;; SSE extract/set expanders + + +;; Add instructions + +;; %%% splits for addditi3 + +(define_expand "addti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;") + +(define_insn "*addti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+0, 1, operands+0, operands+3); + split_ti (operands+1, 1, operands+1, operands+4); + split_ti (operands+2, 1, operands+2, operands+5);") + +;; %%% splits for addsidi3 +; [(set (match_operand:DI 0 "nonimmediate_operand" "") +; (plus:DI (match_operand:DI 1 "general_operand" "") +; (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))] + +(define_expand "adddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;") + +(define_insn "*adddi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands+0, 1, operands+0, operands+3); + split_di (operands+1, 1, operands+1, operands+4); + split_di (operands+2, 1, operands+2, operands+5);") + +(define_insn "adddi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") + (match_operand:DI 1 "nonimmediate_operand" "%0,0")) + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "adc{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "DI")]) + +(define_insn "*adddi3_cc_rex64" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "add{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "addqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 1 "nonimmediate_operand" "%0,0")) + (match_operand:QI 2 "general_operand" "qi,qm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "adc{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "addhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 1 "nonimmediate_operand" "%0,0")) + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, HImode, operands)" + "adc{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) + +(define_insn "addsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*addsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 1 "nonimmediate_operand" "%0")) + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*addsi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "add{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "addqi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qi,qm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_expand "addsi3" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;") + +(define_insn "*lea_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "no_seg_address_operand" "p"))] + "!TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_1_rex64" + [(set (match_operand:SI 0 "register_operand" "=r") + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))] + "TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))] + "TARGET_64BIT" + "lea{l}\t{%a1, %k0|%k0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "no_seg_address_operand" "p"))] + "TARGET_64BIT" + "lea{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "DI")]) + +;; The lea patterns for non-Pmodes needs to be matched by several +;; insns converted to real lea by splitters. + +(define_insn_and_split "*lea_general_1" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (match_operand 1 "index_register_operand" "l") + (match_operand 2 "register_operand" "r")) + (match_operand 3 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[2]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2" + [(set (match_operand 0 "register_operand" "=r") + (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "nonmemory_operand" "ri")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "nonmemory_operand" "ri"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "register_operand" "r")) + (match_operand 4 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[3])" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); + pat = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], + operands[2]), + operands[3]), + operands[4]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (mult:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "register_operand" "r")) + (match_operand:SI 4 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (match_dup 4)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*adddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:DI 2 "x86_64_general_operand" "rme,re,le"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:DI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (plus:DI (match_dup 1) + (match_dup 2)))] + "") + +(define_insn "*adddi_2_rex64" + [(set (reg FLAGS_REG) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rme,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, DImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* ???? We ought to handle there the 32bit case too + - do we need new constraint? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_3_rex64" + [(set (reg FLAGS_REG) + (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme")) + (match_operand:DI 1 "x86_64_general_operand" "%0"))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* ???? We ought to handle there the 32bit case too + - do we need new constraint? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x8000000000000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*adddi_4_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:DI 2 "x86_64_immediate_operand" "e"))) + (clobber (match_scratch:DI 0 "=rm"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128)) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))) + return "sub{q}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_5_rex64" + [(set (reg FLAGS_REG) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + + +(define_insn "*addsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:SI 2 "general_operand" "rmni,rni,lni"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (plus (match_operand 1 "register_operand" "") + (match_operand 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat; + /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) + may confuse gen_lowpart. */ + if (GET_MODE (operands[0]) != Pmode) + { + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + } + operands[0] = gen_lowpart (SImode, operands[0]); + pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; It may seem that nonimmediate operand is proper one for operand 1. +;; The addsi_1 pattern allows nonimmediate operand at that place and +;; we take care in ix86_binary_operator_ok to not allow two memory +;; operands so proper swapping will be done in reload. This allow +;; patterns constructed from addsi_1 to match. +(define_insn "addsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "general_operand" "rmni,lni")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %k0|%k0, %a2}"; + + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); +}) + +(define_insn "*addsi_2" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rmni,rni")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rmni")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_3" + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_3_zext" + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x80000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*addsi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (match_scratch:SI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{l}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_5" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rmni")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_expand "addhi3" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;") + +;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah +;; type optimizations enabled by define-splits. This is not important +;; for PII, and in fact harmful because of partial register stalls. + +(define_insn "*addhi_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:HI 2 "general_operand" "ri,rm,lni"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "2") + (const_string "lea") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*addhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_2" + [(set (reg FLAGS_REG) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmni,rni")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (plus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_3" + [(set (reg FLAGS_REG) + (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni")) + (match_operand:HI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +; See comments above addsi_4 for details. +(define_insn "*addhi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (match_scratch:HI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffff) != 0x8000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{w}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{w}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + + +(define_insn "*addhi_5" + [(set (reg FLAGS_REG) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rmni")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_expand "addqi3" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*addqi_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r") + (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "3") + (const_string "lea") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "QI,QI,SI,SI")]) + +(define_insn "*addqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ + if (GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) < 0) + { + operands[1] = GEN_INT (-INTVAL (operands[1])); + return "sub{b}\t{%1, %0|%0, %1}"; + } + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 1 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "load") + (const_string "none"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_2" + [(set (reg FLAGS_REG) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmni,qni")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_3" + [(set (reg FLAGS_REG) + (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni")) + (match_operand:QI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +; See comments above addsi_4 for details. +(define_insn "*addqi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n"))) + (clobber (match_scratch:QI 0 "=qm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xff) != 0x80" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{b}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{b}\t{%2, %0|%0, %2}"; + } + return "sub{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "*addqi_5" + [(set (reg FLAGS_REG) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qmni")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "addqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "Qmn"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "addxf3" + [(set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "adddf3" + [(set (match_operand:DF 0 "register_operand" "") + (plus:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_expand "addsf3" + [(set (match_operand:SF 0 "register_operand" "") + (plus:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") + +;; Subtract instructions + +;; %%% splits for subditi3 + +(define_expand "subti3" + [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;") + +(define_insn "*subti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DI (match_dup 4) + (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+0, 1, operands+0, operands+3); + split_ti (operands+1, 1, operands+1, operands+4); + split_ti (operands+2, 1, operands+2, operands+5);") + +;; %%% splits for subsidi3 + +(define_expand "subdi3" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;") + +(define_insn "*subdi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:SI (match_dup 4) + (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands+0, 1, operands+0, operands+3); + split_di (operands+1, 1, operands+1, operands+4); + split_di (operands+2, 1, operands+2, operands+5);") + +(define_insn "subdi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sbb{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_2_rex64" + [(set (reg FLAGS_REG) + (compare + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_3_rex63" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "subqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 2 "general_operand" "qi,qm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sbb{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "subhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 2 "general_operand" "ri,rm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sbb{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) + +(define_insn "subsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 2 "general_operand" "ri,rm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "subsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=rm,r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 2 "general_operand" "ri,rm"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_expand "subsi3" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;") + +(define_insn "*subsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3_zext" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "rim"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %1|%1, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_expand "subhi3" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;") + +(define_insn "*subhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_2" + [(set (reg FLAGS_REG) + (compare + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "subqi3" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;") + +(define_insn "*subqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_2" + [(set (reg FLAGS_REG) + (compare + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qi,qm")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qi,qm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "subxf3" + [(set (match_operand:XF 0 "register_operand" "") + (minus:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "subdf3" + [(set (match_operand:DF 0 "register_operand" "") + (minus:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_expand "subsf3" + [(set (match_operand:SF 0 "register_operand" "") + (minus:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") + +;; Multiply instructions + +(define_expand "muldi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*muldi3_1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:DI 2 "x86_64_general_operand" "K,e,mr"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "@ + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "DI")]) + +(define_expand "mulsi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*mulsi3_1" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "general_operand" "K,i,mr"))) + (clobber (reg:CC FLAGS_REG))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + "@ + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "SI")]) + +(define_insn "*mulsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "general_operand" "K,i,mr")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "@ + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "SI")]) + +(define_expand "mulhi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "") + +(define_insn "*mulhi3_1" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:HI 2 "general_operand" "K,i,mr"))) + (clobber (reg:CC FLAGS_REG))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + "@ + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "HI")]) + +(define_expand "mulqi3" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +(define_insn "*mulqi3_1" + [(set (match_operand:QI 0 "register_operand" "=a") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "mode" "QI")]) + +(define_expand "umulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (zero_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "")) + (zero_extend:HI + (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +(define_insn "*umulqihi3_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) + (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "mode" "QI")]) + +(define_expand "mulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +(define_insn "*mulqihi3_insn" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) + (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "imul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "mode" "QI")]) + +(define_expand "umulditi3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*umulditi3_insn" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) + (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "DI")]) + +;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers +(define_expand "umulsidi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT" + "") + +(define_insn "*umulsidi3_insn" + [(set (match_operand:DI 0 "register_operand" "=A") + (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) + (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +(define_expand "mulditi3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (sign_extend:TI + (match_operand:DI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*mulditi3_insn" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) + (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "DI")]) + +(define_expand "mulsidi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT" + "") + +(define_insn "*mulsidi3_insn" + [(set (match_operand:DI 0 "register_operand" "=A") + (mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +(define_expand "umuldi3_highpart" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" ""))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*umuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (zero_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "DI")]) + +(define_expand "umulsi3_highpart" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*umulsi3_highpart_insn" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +(define_insn "*umulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +(define_expand "smuldi3_highpart" + [(parallel [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (sign_extend:TI + (match_operand:DI 2 "register_operand" ""))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*smuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (sign_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "DI")]) + +(define_expand "smulsi3_highpart" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*smulsi3_highpart_insn" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +(define_insn "*smulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "mulxf3" + [(set (match_operand:XF 0 "register_operand" "") + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "muldf3" + [(set (match_operand:DF 0 "register_operand" "") + (mult:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_expand "mulsf3" + [(set (match_operand:SF 0 "register_operand" "") + (mult:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") + +;; Divide instructions + +(define_insn "divqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (div:QI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +(define_insn "udivqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (udiv:QI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "divxf3" + [(set (match_operand:XF 0 "register_operand" "") + (div:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "register_operand" "") + (div:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_expand "divsf3" + [(set (match_operand:SF 0 "register_operand" "") + (div:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") + +;; Remainder instructions. + +(define_expand "divmoddi4" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case slightly because it results in worse scheduling +;; of code. +(define_insn "*divmoddi4_nocltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=&a,?a") + (div:DI (match_operand:DI 2 "register_operand" "1,0") + (match_operand:DI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:DI 1 "register_operand" "=&d,&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && !optimize_size && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi4_cltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 2 "register_operand" "a") + (match_operand:DI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 1 "register_operand" "=&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (optimize_size || TARGET_USE_CLTD)" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi_noext_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (mod:DI (match_dup 1) (match_dup 2))) + (use (match_operand:DI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:DI (match_dup 4) (const_int 63))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:DI (reg:DI 0) (match_dup 2))) + (set (match_dup 3) + (mod:DI (reg:DI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] +{ + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && !optimize_size) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + gcc_assert (!true_regnum (operands[1])); + operands[4] = operands[1]; + } +}) + + +(define_expand "divmodsi4" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case slightly because it results in worse scheduling +;; of code. +(define_insn "*divmodsi4_nocltd" + [(set (match_operand:SI 0 "register_operand" "=&a,?a") + (div:SI (match_operand:SI 2 "register_operand" "1,0") + (match_operand:SI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:SI 1 "register_operand" "=&d,&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "!optimize_size && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi4_cltd" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "optimize_size || TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi_noext" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=d") + (mod:SI (match_dup 1) (match_dup 2))) + (use (match_operand:SI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "" + "idiv{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:SI (match_dup 4) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SI (reg:SI 0) (match_dup 2))) + (set (match_dup 3) + (mod:SI (reg:SI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] +{ + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && !optimize_size) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + gcc_assert (!true_regnum (operands[1])); + operands[4] = operands[1]; + } +}) +;; %%% Split me. +(define_insn "divmodhi4" + [(set (match_operand:HI 0 "register_operand" "=a") + (div:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:HI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:HI 3 "register_operand" "=&d") + (mod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "cwtd\;idiv{w}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "udivmoddi4" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=&d") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "xor{q}\t%3, %3\;div{q}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) + +(define_insn "*udivmoddi4_noext" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:DI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "udivmodsi4" + [(set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=&d") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "" + "xor{l}\t%3, %3\;div{l}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "*udivmodsi4_noext" + [(set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=d") + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))] + "" + "div{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "udivmodhi4" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_operand:HI 0 "register_operand" "") + (udiv:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "nonimmediate_operand" ""))) + (set (match_operand:HI 3 "register_operand" "") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "operands[4] = gen_reg_rtx (HImode);") + +(define_insn "*udivmodhi_noext" + [(set (match_operand:HI 0 "register_operand" "=a") + (udiv:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:HI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:HI 3 "register_operand" "=d") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_operand:HI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "" + "div{w}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "HI")]) + +;; We cannot use div/idiv for double division, because it causes +;; "division by zero" on the overflow and that's not what we expect +;; from truncate. Because true (non truncating) double division is +;; never generated, we can't create this insn anyway. +; +;(define_insn "" +; [(set (match_operand:SI 0 "register_operand" "=a") +; (truncate:SI +; (udiv:DI (match_operand:DI 1 "register_operand" "A") +; (zero_extend:DI +; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) +; (set (match_operand:SI 3 "register_operand" "=d") +; (truncate:SI +; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) +; (clobber (reg:CC FLAGS_REG))] +; "" +; "div{l}\t{%2, %0|%0, %2}" +; [(set_attr "type" "idiv")]) + +;;- Logical AND instructions + +;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. +;; Note that this excludes ah. + +(define_insn "*testdi_1_rex64" + [(set (reg FLAGS_REG) + (compare + (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,0,1,1") + (set_attr "mode" "SI,SI,DI,DI,DI") + (set_attr "pent_pair" "uv,np,uv,np,uv")]) + +(define_insn "testsi_1" + [(set (reg FLAGS_REG) + (compare + (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:SI 1 "general_operand" "in,in,rin")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "test{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testsi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testhi_1" + [(set (reg FLAGS_REG) + (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:HI 1 "general_operand" "n,n,rn")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "test{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "HI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "nonmemory_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testqi_1_maybe_si" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) + (const_int 0)))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, + GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 3) + { + if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) < 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "test{l}\t{%1, %k0|%k0, %1}"; + } + return "test{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1,1") + (set_attr "mode" "QI,QI,QI,SI") + (set_attr "pent_pair" "uv,np,uv,np")]) + +(define_insn "*testqi_1" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm") + (match_operand:QI 1 "general_operand" "n,n,qn")) + (const_int 0)))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "QI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ext_ccno_0" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testqi_ext_0" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI") + (set_attr "length_immediate" "1") + (set_attr "pent_pair" "np")]) + +(define_insn "*testqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "general_operand" "Qm"))) + (const_int 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "register_operand" "Q"))) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +;; Combine likes to form bit extractions for some tests. Humor it. +(define_insn "*testqi_ext_3" + [(set (reg FLAGS_REG) + (compare (zero_extract:SI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + && (GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +(define_insn "*testqi_ext_3_rex64" + [(set (reg FLAGS_REG) + (compare (zero_extract:DI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (const_int 0)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + /* Ensure that resulting mask is zero or sign extended operand. */ + && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 + && INTVAL (operands[1]) > 32)) + && (GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == DImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] +{ + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); + HOST_WIDE_INT mask; + enum machine_mode mode, submode; + + mode = GET_MODE (val); + if (GET_CODE (val) == MEM) + { + /* ??? Combine likes to put non-volatile mem extractions in QImode + no matter the size of the test. So find a mode that works. */ + if (! MEM_VOLATILE_P (val)) + { + mode = smallest_mode_for_size (pos + len, MODE_INT); + val = adjust_address (val, mode, 0); + } + } + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), + GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode)) + { + /* Narrow a paradoxical subreg to prevent partial register stalls. */ + mode = submode; + val = SUBREG_REG (val); + } + else if (mode == HImode && pos + len <= 8) + { + /* Small HImode tests can be converted to QImode. */ + mode = QImode; + val = gen_lowpart (QImode, val); + } + + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; + + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); +}) + +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the conversion only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]);") + + +;; %%% This used to optimize known byte-wide and operations to memory, +;; and sometimes to QImode registers. If this is considered useful, +;; it should be done with splitters. + +(define_expand "anddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_szext_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "ix86_expand_binary_operator (AND, DImode, operands); DONE;") + +(define_insn "*anddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bq|x}\t{%1,%0|%0, %1}"; + else + return "movz{wq|x}\t{%1,%0|%0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "and{l}\t{%k2, %k0|%k0, %k2}"; + else + return "and{q}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,alu,imovx") + (set_attr "length_immediate" "*,*,*,0") + (set_attr "mode" "SI,DI,DI,DI")]) + +(define_insn "*anddi_2" + [(set (reg FLAGS_REG) + (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm") + (and:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, DImode, operands)" + "@ + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI,DI,DI")]) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (AND, SImode, operands); DONE;") + +(define_insn "*andsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:SI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bl|x}\t{%1,%0|%0, %1}"; + else + return "movz{wl|x}\t{%1,%0|%0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "and{l}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_dup 0) + (const_int -65536))) + (clobber (reg:CC FLAGS_REG))] + "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (HImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -256))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (QImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -65281))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(parallel [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]);") + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andsi_2" + [(set (reg FLAGS_REG) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (and:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_2_zext" + [(set (reg FLAGS_REG) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "andhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (AND, HImode, operands); DONE;") + +(define_insn "*andhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:HI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + gcc_assert (INTVAL (operands[2]) == 0xff); + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + return "and{w}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*andhi_2" + [(set (reg FLAGS_REG) + (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (and:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, HImode, operands)" + "and{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "andqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (AND, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*andqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qi,qmi,ri"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, QImode, operands)" + "@ + and{b}\t{%2, %0|%0, %2} + and{b}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qi,qmi"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_2_maybe_si" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qim,qi,i")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 2) + { + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); + return "and{l}\t{%2, %k0|%k0, %2}"; + } + return "and{b}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_2" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, QImode, operands)" + "and{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_2_slp" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "nonimmediate_operand" "qmi,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (and:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +;; ??? A bug in recog prevents it from recognizing a const_int as an +;; operand to zero_extend in andqi_ext_1. It was checking explicitly +;; for a QImode operand, which of course failed. + +(define_insn "andqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +;; Generated by peephole translating test to and. This shows up +;; often in fp comparisons. + +(define_insn "*andqi_ext_0_cc" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode)" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it introduces risk +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical inclusive OR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "iordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "ix86_expand_binary_operator (IOR, DImode, operands); DONE;") + +(define_insn "*iordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rme"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_2_rex64" + [(set (reg FLAGS_REG) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (ior:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_3_rex64" + [(set (reg FLAGS_REG) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (IOR, SImode, operands); DONE;") + +(define_insn "*iorsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rmi"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*iorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=rm") + (zero_extend:DI + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=rm") + (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (ior:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*iorsi_2_zext" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_3" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "iorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (IOR, HImode, operands); DONE;") + +(define_insn "*iorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmi,ri"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_2" + [(set (reg FLAGS_REG) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (ior:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_3" + [(set (reg FLAGS_REG) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "iorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (IOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*iorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, QImode, operands)" + "@ + or{b}\t{%2, %0|%0, %2} + or{b}\t{%2, %0|%0, %2} + or{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*iorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) + (ior:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qmi,qi"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (ior:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, QImode, operands)" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2_slp" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qim,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_3" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "iorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "ior{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (ior:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical XOR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "xordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "ix86_expand_binary_operator (XOR, DImode, operands); DONE;") + +(define_insn "*xordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_binary_operator_ok (XOR, DImode, operands)" + "@ + xor{q}\t{%2, %0|%0, %2} + xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI,DI")]) + +(define_insn "*xordi_2_rex64" + [(set (reg FLAGS_REG) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (xor:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "@ + xor{q}\t{%2, %0|%0, %2} + xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI,DI")]) + +(define_insn "*xordi_3_rex64" + [(set (reg FLAGS_REG) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (XOR, SImode, operands); DONE;") + +(define_insn "*xorsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; Add speccase for immediates +(define_insn "*xorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (xor:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*xorsi_2_zext" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_3" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "xorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (XOR, HImode, operands); DONE;") + +(define_insn "*xorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmi,ri"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_2" + [(set (reg FLAGS_REG) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (xor:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_3" + [(set (reg FLAGS_REG) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "xorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (XOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*xorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "@ + xor{b}\t{%2, %0|%0, %2} + xor{b}\t{%2, %0|%0, %2} + xor{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*xorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (xor:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qi,qmi"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "xorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_1" + [(set (reg FLAGS_REG) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (xor:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, QImode, operands)" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_2_slp" + [(set (reg FLAGS_REG) + (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qim,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_2" + [(set (reg FLAGS_REG) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_expand "xorqi_cc_ext_1" + [(parallel [ + (set (reg:CCNO FLAGS_REG) + (compare:CCNO + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))])] + "" + "") + +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (xor:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since XOR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Negation instructions + +(define_expand "negti2" + [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "ix86_expand_unary_operator (NEG, TImode, operands); DONE;") + +(define_insn "*negti2_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=ro") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_unary_operator_ok (NEG, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_dup 2)) (const_int 0))) + (set (match_dup 0) (neg:DI (match_dup 2)))]) + (parallel + [(set (match_dup 1) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 1) + (neg:DI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+1, 1, operands+2, operands+3); + split_ti (operands+0, 1, operands+0, operands+1);") + +(define_expand "negdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_expand_unary_operator (NEG, DImode, operands); DONE;") + +(define_insn "*negdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=ro") + (neg:DI (match_operand:DI 1 "general_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && ix86_unary_operator_ok (NEG, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SI (match_dup 2)) (const_int 0))) + (set (match_dup 0) (neg:SI (match_dup 2)))]) + (parallel + [(set (match_dup 1) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 1) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands+1, 1, operands+2, operands+3); + split_di (operands+0, 1, operands+0, operands+1);") + +(define_insn "*negdi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negdi2_cmpz_rex64" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_dup 1)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + + +(define_expand "negsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_expand_unary_operator (NEG, SImode, operands); DONE;") + +(define_insn "*negsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; Combine is quite creative about this pattern. +(define_insn "*negsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negsi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*negsi2_cmpz_zext" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (lshiftrt:DI + (neg:DI (ashift:DI + (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1) + (const_int 32))) + (const_int 32)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_expand "neghi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NEG, HImode, operands); DONE;") + +(define_insn "*neghi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*neghi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_expand "negqi2" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NEG, QImode, operands); DONE;") + +(define_insn "*negqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +(define_insn "*negqi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +;; Changing of sign for FP values is doable using integer unit too. + +(define_expand "negsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;") + +(define_expand "abssf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;") + +(define_insn "*absnegsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x ,x,f,rm") + (match_operator:SF 3 "absneg_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0,0 ")])) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0,X,X ")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" + "#") + +(define_insn "*absnegsf2_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,rm") + (match_operator:SF 3 "absneg_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")])) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE_MATH + && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" + "#") + +(define_insn "*absnegsf2_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm") + (match_operator:SF 3 "absneg_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !TARGET_SSE_MATH + && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" + "#") + +(define_expand "copysignsf3" + [(match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "nonmemory_operand" "") + (match_operand:SF 2 "register_operand" "")] + "TARGET_SSE_MATH" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysignsf3_const" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF + [(match_operand:V4SF 1 "vector_move_operand" "xmC") + (match_operand:SF 2 "register_operand" "0") + (match_operand:V4SF 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "TARGET_SSE_MATH" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysignsf3_var" + [(set (match_operand:SF 0 "register_operand" "=x, x, x, x,x") + (unspec:SF + [(match_operand:SF 2 "register_operand" " x, 0, 0, x,x") + (match_operand:SF 3 "register_operand" " 1, 1, x, 1,x") + (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0") + (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V4SF 1 "=x, x, x, x,x"))] + "TARGET_SSE_MATH" + "#") + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF + [(match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "register_operand" "") + (match_operand:V4SF 4 "" "") + (match_operand:V4SF 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V4SF 1 ""))] + "TARGET_SSE_MATH && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;") + +(define_expand "absdf2" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;") + +(define_insn "*absnegdf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,f,rm") + (match_operator:DF 3 "absneg_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0,0")])) + (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X,X")) + (clobber (reg:CC FLAGS_REG))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "#") + +(define_insn "*absnegdf2_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,rm") + (match_operator:DF 3 "absneg_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0 ")])) + (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X ")) + (clobber (reg:CC FLAGS_REG))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH + && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "#") + +(define_insn "*absnegdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm") + (match_operator:DF 3 "absneg_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) + && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "#") + +(define_expand "copysigndf3" + [(match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "nonmemory_operand" "") + (match_operand:DF 2 "register_operand" "")] + "TARGET_SSE2 && TARGET_SSE_MATH" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysigndf3_const" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF + [(match_operand:V2DF 1 "vector_move_operand" "xmC") + (match_operand:DF 2 "register_operand" "0") + (match_operand:V2DF 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysigndf3_var" + [(set (match_operand:DF 0 "register_operand" "=x, x, x, x,x") + (unspec:DF + [(match_operand:DF 2 "register_operand" " x, 0, 0, x,x") + (match_operand:DF 3 "register_operand" " 1, 1, x, 1,x") + (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0") + (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V2DF 1 "=x, x, x, x,x"))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF + [(match_operand:DF 2 "register_operand" "") + (match_operand:DF 3 "register_operand" "") + (match_operand:V2DF 4 "" "") + (match_operand:V2DF 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V2DF 1 ""))] + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) + +(define_expand "negxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))] + "TARGET_80387" + "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;") + +(define_expand "absxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))] + "TARGET_80387" + "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;") + +(define_insn "*absnegxf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm") + (match_operator:XF 3 "absneg_operator" + [(match_operand:XF 1 "nonimmediate_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 + && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)" + "#") + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_expand "negtf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (neg:TF (match_operand:TF 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_fp_absneg_operator (NEG, TFmode, operands); DONE;") + +(define_expand "abstf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (abs:TF (match_operand:TF 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_fp_absneg_operator (ABS, TFmode, operands); DONE;") + +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x,m") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "nonimmediate_operand" "0, x,0")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_unary_operator_ok (GET_CODE (operands[3]), TFmode, operands)" + "#") +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Splitters for fp abs and neg. + +(define_split + [(set (match_operand 0 "fp_register_operand" "") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "absneg_operator" + [(match_operand 1 "register_operand" "")])) + (use (match_operand 2 "nonimmediate_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) (match_dup 3))] +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode vmode = GET_MODE (operands[2]); + rtx tmp; + + operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0); + if (operands_match_p (operands[0], operands[2])) + { + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } + if (GET_CODE (operands[3]) == ABS) + tmp = gen_rtx_AND (vmode, operands[1], operands[2]); + else + tmp = gen_rtx_XOR (vmode, operands[1], operands[2]); + operands[3] = tmp; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand:V4SF 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_lowpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + if (TARGET_64BIT) + { + tmp = gen_lowpart (DImode, operands[0]); + tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63)); + operands[0] = tmp; + + if (GET_CODE (operands[1]) == ABS) + tmp = const0_rtx; + else + tmp = gen_rtx_NOT (DImode, tmp); + } + else + { + operands[0] = gen_highpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_64BIT ? 1 : 2)); + if (GET_CODE (operands[1]) == ABS) + { + tmp = GEN_INT (0x7fff); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand 0 "memory_operand" "") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + enum machine_mode mode = GET_MODE (operands[0]); + int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode); + rtx tmp; + + /* APPLE LOCAL begin radar 4117515 */ + if (size == 4) + { + operands[0] = adjust_address (operands[0], SImode, 0); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + else + { + operands[0] = adjust_address (operands[0], QImode, size - 1); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7f, QImode); + tmp = gen_rtx_AND (QImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80, QImode); + tmp = gen_rtx_XOR (QImode, operands[0], tmp); + } + } + /* APPLE LOCAL end radar 4117515 */ + operands[1] = tmp; +}) + +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. + +(define_insn "*negsf2_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "0")))] + "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "SF")]) + +(define_insn "*negdf2_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "register_operand" "0")))] + "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*negxf2_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*abssf2_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "register_operand" "0")))] + "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "SF")]) + +(define_insn "*absdf2_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (match_operand:DF 1 "register_operand" "0")))] + "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*absxf2_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (abs:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*negextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*negextenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*negextendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*absextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*absextenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (abs:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*absextendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (abs:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +;; One complement instructions + +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_unary_operator (NOT, DImode, operands); DONE;") + +(define_insn "*one_cmpldi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)" + "not{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +(define_insn "*one_cmpldi2_2_rex64" + [(set (reg FLAGS_REG) + (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_dup 1)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, DImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:DI (match_operand:DI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "nonimmediate_operand" "") + (not:DI (match_dup 3)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 + [(xor:DI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:DI (match_dup 3) (const_int -1)))])] + "") + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NOT, SImode, operands); DONE;") + +(define_insn "*one_cmplsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*one_cmplsi2_2" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:SI 1 "nonimmediate_operand" "") + (not:SI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SI (match_dup 3) (const_int -1)))])] + "") + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_2_zext" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_dup 1))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 3))))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])] + "") + +(define_expand "one_cmplhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NOT, HImode, operands); DONE;") + +(define_insn "*one_cmplhi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, HImode, operands)" + "not{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*one_cmplhi2_2" + [(set (reg FLAGS_REG) + (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (not:HI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NEG, HImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:HI (match_operand:HI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:HI 1 "nonimmediate_operand" "") + (not:HI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:HI (match_dup 3) (const_int -1)))])] + "") + +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_expand "one_cmplqi2" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NOT, QImode, operands); DONE;") + +(define_insn "*one_cmplqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + "ix86_unary_operator_ok (NOT, QImode, operands)" + "@ + not{b}\t%0 + not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI,SI")]) + +(define_insn "*one_cmplqi2_2" + [(set (reg FLAGS_REG) + (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (not:QI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, QImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:QI (match_operand:QI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:QI 1 "nonimmediate_operand" "") + (not:QI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:QI (match_dup 3) (const_int -1)))])] + "") + +;; Arithmetic shift instructions + +;; DImode shifts are implemented using the i386 "shift double" opcode, +;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count +;; is variable, then the count is in %cl and the "imm" operand is dropped +;; from the assembler input. +;; +;; This instruction shifts the target reg/mem as usual, but instead of +;; shifting in zeros, bits are shifted in from reg operand. If the insn +;; is a left shift double, bits are taken from the high order bits of +;; reg, else if the insn is a shift right double, bits are taken from the +;; low order bits of reg. So if %eax is "1234" and %edx is "5678", +;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". +;; +;; Since sh[lr]d does not change the `reg' operand, that is done +;; separately, making all shifts emit pairs of shift double and normal +;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to +;; support a 63 bit shift, each shift where the count is in a reg expands +;; to a pair of shifts, a branch, a shift by 32 and a label. +;; +;; If the shift count is a constant, we need never emit more than one +;; shift pair, instead using moves and sign extension for counts greater +;; than 31. + +(define_expand "ashlti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_ashlti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (ASHIFT, TImode, operands); + DONE; +}) + +/* APPLE LOCAL begin 6440204 */ +/* Moved here from sse.md so this pattern gets recognized before ashlti3_2. Ugh. */ +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) +/* APPLE LOCAL end 6440204 */ + +(define_insn "ashlti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*ashlti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "J,c")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "@ + shld{q}\t{%2, %1, %0|%0, %1, %2} + shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector")]) + +(define_expand "x86_64_shift_adj" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 64)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_64BIT" + "") + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;") + +(define_insn "*ashldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cJ,M"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{q}\t{%0, %0|%0, %0}"; + + case TYPE_LEA: + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3); + operands[1] = gen_rtx_MULT (DImode, operands[1], + GEN_INT (1 << INTVAL (operands[2]))); + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "index_register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (mult:DI (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashldi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "e")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashift:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "e")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_1" + [(set (match_operand:DI 0 "register_operand" "=&r,r") + (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "Jc,Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") + +(define_insn "x86_shld_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:SI (ashift:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "I,c")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "@ + shld{l}\t{%2, %1, %0|%0, %1, %2} + shld{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "x86_shift_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 32)) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_CMOVE" + "") + +(define_expand "x86_shift_adj_2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;") + +(define_insn "*ashlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{l}\t{%0, %0|%0, %0}"; + + case TYPE_LEA: + return "#"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4" + [(const_int 0)] +{ + rtx pat; + enum machine_mode mode = GET_MODE (operands[0]); + + if (GET_MODE_SIZE (mode) < 4) + operands[0] = gen_lowpart (SImode, operands[0]); + if (mode != Pmode) + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); + + pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Rare case of shifting RSP is handled by generating move and shift +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat, clob; + emit_move_insn (operands[0], operands[1]); + pat = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (GET_MODE (operands[0]), + operands[0], operands[2])); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); + DONE; +}) + +(define_insn "*ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t{%k0, %k0|%k0, %k0}"; + + case TYPE_LEA: + return "#"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) (zero_extend:DI + (subreg:SI (mult:SI (match_dup 1) + (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); +}) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashift:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t{%k0, %k0|%k0, %k0}"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_expand "ashlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;") + +(define_insn "*ashlhi3_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI,SI")]) + +(define_insn "*ashlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_insn "*ashlhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_expand "ashlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? + +(define_insn "*ashlqi3_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t{%k0, %k0|%k0, %k0}"; + else + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI,SI")]) + +(define_insn "*ashlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t{%k0, %k0|%k0, %k0}"; + else + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlqi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashift:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +(define_insn "*ashlqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +;; See comment above `ashldi3' about how this works. + +(define_expand "ashrti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_ashrti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (ASHIFTRT, TImode, operands); + DONE; +}) + +(define_insn "ashrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*ashrti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "J,c")) + (ashift:DI (match_operand:DI 1 "register_operand" "r,r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "@ + shrd{q}\t{%2, %1, %0|%0, %1, %2} + shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;") + +(define_insn "*ashrdi3_63_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") + (match_operand:DI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 + && (TARGET_USE_CLTD || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + {cqto|cqo} + sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + sar{q}\t{%2, %0|%0, %2} + sar{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_one_bit_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrdi3_one_bit_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") + +(define_insn "x86_shrd_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:SI (ashiftrt:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "I,c")) + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "@ + shrd{l}\t{%2, %1, %0|%0, %1, %2} + shrd{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "pent_pair" "np") + (set_attr "mode" "SI")]) + +(define_expand "x86_shift_adj_3" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_insn "ashrsi3_31" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_31_zext" + [(set (match_operand:DI 0 "register_operand" "=*d,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_CLTD || optimize_size) + && INTVAL (operands[2]) == 31 + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;") + +(define_insn "*ashrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*ashrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %0|%0, %2} + sar{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %k0|%k0, %2} + sar{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*ashrsi3_one_bit_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "ashrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;") + +(define_insn "*ashrhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "@ + sar{w}\t{%2, %0|%0, %2} + sar{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*ashrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "ashrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;") + +(define_insn "*ashrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "sar{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "@ + sar{b}\t{%2, %0|%0, %2} + sar{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + sar{b}\t{%1, %0|%0, %1} + sar{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + + +;; Logical shift instructions + +;; See comment above `ashldi3' about how this works. + +(define_expand "lshrti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_lshrti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (LSHIFTRT, TImode, operands); + DONE; +}) + +(define_insn "lshrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; APPLE LOCAL begin mainline 5951842 +;; This pattern must be defined before *lshrti3_2 to prevent +;; combine pass from converting sse2_lshrti3 to *lshrti3_2. + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end mainline 5951842 + +(define_insn "*lshrti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;") + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;") + +(define_insn "*lshrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{q}\t{%2, %0|%0, %2} + shr{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrdi3_cconly_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "e")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*lshrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "e")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*lshrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;") + +(define_insn "*lshrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*lshrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %0|%0, %2} + shr{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %k0|%k0, %2} + shr{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*lshrsi3_cmp_one_bit_zext" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "lshrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;") + +(define_insn "*lshrhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{w}\t{%2, %0|%0, %2} + shr{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*lshrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "lshrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;") + +(define_insn "*lshrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "shr{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "@ + shr{b}\t{%2, %0|%0, %2} + shr{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*lshrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + shr{b}\t{%1, %0|%0, %1} + shr{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi2_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*lshrqi2_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) + && (optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +;; Rotate instructions + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATE, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashift:SI (match_dup 4) (match_dup 2)) + (lshiftrt:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashift:SI (match_dup 5) (match_dup 2)) + (lshiftrt:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands, 1, operands + 4, operands + 5);") + +(define_insn "*rotlsi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "rol{q}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "e,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)" + "@ + rol{q}\t{%2, %0|%0, %2} + rol{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "DI")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;") + +(define_insn "*rotlsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "rol{l}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "rol{l}\t%k0" + [(set_attr "type" "rotate") + (set_attr "length" "2")]) + +(define_insn "*rotlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %0|%0, %2} + rol{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_insn "*rotlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %k0|%k0, %2} + rol{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;") + +(define_insn "*rotlhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "rol{w}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, HImode, operands)" + "@ + rol{w}\t{%2, %0|%0, %2} + rol{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "HI")]) + +(define_expand "rotlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") + +(define_insn "*rotlqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "rol{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, QImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "rol{b}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + rol{b}\t{%1, %0|%0, %1} + rol{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + +(define_insn "*rotlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, QImode, operands)" + "@ + rol{b}\t{%2, %0|%0, %2} + rol{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATERT, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2)) + (ashift:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2)) + (ashift:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands, 1, operands + 4, operands + 5);") + +(define_insn "*rotrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "ror{q}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)" + "@ + ror{q}\t{%2, %0|%0, %2} + ror{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "DI")]) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "" + "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;") + +(define_insn "*rotrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "ror{l}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "ror{l}\t%k0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %0|%0, %2} + ror{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_insn "*rotrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %k0|%k0, %2} + ror{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_expand "rotrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;") + +(define_insn "*rotrhi3_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, HImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "ror{w}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, HImode, operands)" + "@ + ror{w}\t{%2, %0|%0, %2} + ror{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "HI")]) + +(define_expand "rotrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;") + +(define_insn "*rotrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, QImode, operands) + && (TARGET_SHIFT1 || optimize_size)" + "ror{b}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "ror{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, QImode, operands)" + "@ + ror{b}\t{%2, %0|%0, %2} + ror{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_insn "*rotrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + ror{b}\t{%1, %0|%0, %1} + ror{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + +;; Bit set / bit test instructions + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand 1 "ext_register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "const8_operand" "") + (match_operand 2 "const8_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + /* Handle insertions to %ah et al. */ + if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[0], VOIDmode)) + FAIL; + + if (TARGET_64BIT) + emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); + else + emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); + + DONE; +}) + +;; %%% bts, btr, btc, bt. +;; In general these instructions are *slow* when applied to memory, +;; since they enforce atomic operation. When applied to registers, +;; it depends on the cpu implementation. They're never faster than +;; the corresponding and/ior/xor operations, so with 32-bit there's +;; no point. But in 64-bit, we can't hold the relevant immediates +;; within the instruction itself, so operating on bits in the high +;; 32-bits of a register becomes easier. +;; +;; These are slow on Nocona, but fast on Athlon64. We do require the use +;; of btrq and btcq for corner cases of post-reload expansion of absdf and +;; negdf respectively, so they can never be disabled entirely. + +(define_insn "*btsq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "bts{q} %1,%0" + [(set_attr "type" "alu1")]) + +(define_insn "*btrq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btr{q} %1,%0" + [(set_attr "type" "alu1")]) + +(define_insn "*btcq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btc{q} %1,%0" + [(set_attr "type" "alu1")]) + +;; Allow Nocona to avoid these instructions if a register is available. + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_iordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (~lo, ~hi, DImode); + if (i >= 32) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_anddi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI + (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_xordi3 (operands[0], operands[0], op1)); + DONE; +}) + +;; Store-flag instructions. + +;; For all sCOND expanders, also expand the compare or test insn that +;; generates cc0. Generate an equality comparison if `seq' or `sne'. + +;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way +;; to avoid partial register stalls. Otherwise do things the setcc+movzx +;; way, which can later delete the movzx if only QImode is needed. + +(define_expand "seq" + [(set (match_operand:QI 0 "register_operand" "") + (eq:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;") + +(define_expand "sne" + [(set (match_operand:QI 0 "register_operand" "") + (ne:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;") + +(define_expand "sgt" + [(set (match_operand:QI 0 "register_operand" "") + (gt:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;") + +(define_expand "sgtu" + [(set (match_operand:QI 0 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;") + +(define_expand "slt" + [(set (match_operand:QI 0 "register_operand" "") + (lt:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;") + +(define_expand "sltu" + [(set (match_operand:QI 0 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;") + +(define_expand "sge" + [(set (match_operand:QI 0 "register_operand" "") + (ge:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;") + +(define_expand "sgeu" + [(set (match_operand:QI 0 "register_operand" "") + (geu:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;") + +(define_expand "sle" + [(set (match_operand:QI 0 "register_operand" "") + (le:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;") + +(define_expand "sleu" + [(set (match_operand:QI 0 "register_operand" "") + (leu:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;") + +(define_expand "sunordered" + [(set (match_operand:QI 0 "register_operand" "") + (unordered:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;") + +(define_expand "sordered" + [(set (match_operand:QI 0 "register_operand" "") + (ordered:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387" + "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;") + +(define_expand "suneq" + [(set (match_operand:QI 0 "register_operand" "") + (uneq:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;") + +(define_expand "sunge" + [(set (match_operand:QI 0 "register_operand" "") + (unge:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;") + +(define_expand "sungt" + [(set (match_operand:QI 0 "register_operand" "") + (ungt:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;") + +(define_expand "sunle" + [(set (match_operand:QI 0 "register_operand" "") + (unle:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;") + +(define_expand "sunlt" + [(set (match_operand:QI 0 "register_operand" "") + (unlt:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;") + +(define_expand "sltgt" + [(set (match_operand:QI 0 "register_operand" "") + (ltgt:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;") + +(define_insn "*setcc_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +(define_insn "*setcc_2" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; sete %al + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + PUT_MODE (operands[1], QImode); +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + PUT_MODE (operands[1], QImode); +}) + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +;; The SSE store flag instructions saves 0 or 0xffffffff to the result. +;; subsequent logical operations are used to imitate conditional moves. +;; 0xffffffff is NaN, but not in normalized form, so we can't represent +;; it directly. + +(define_insn "*sse_setccsf" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 1 "sse_comparison_operator" + [(match_operand:SF 2 "register_operand" "0") + (match_operand:SF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE" + "cmp%D1ss\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) + +(define_insn "*sse_setccdf" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x") + (match_operator:DF 1 "sse_comparison_operator" + [(match_operand:DF 2 "register_operand" "0") + (match_operand:DF 3 "nonimmediate_operand" "xm")]))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2" + "cmp%D1sd\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +;; Basic conditional jump instructions. +;; We ignore the overflow flag for signed branch instructions. + +;; For all bCOND expanders, also expand the compare or test insn that +;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'. + +(define_expand "beq" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (EQ, operands[0]); DONE;") + +(define_expand "bne" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (NE, operands[0]); DONE;") + +(define_expand "bgt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (GT, operands[0]); DONE;") + +(define_expand "bgtu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (GTU, operands[0]); DONE;") + +(define_expand "blt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (LT, operands[0]); DONE;") + +(define_expand "bltu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (LTU, operands[0]); DONE;") + +(define_expand "bge" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (GE, operands[0]); DONE;") + +(define_expand "bgeu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (GEU, operands[0]); DONE;") + +(define_expand "ble" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (LE, operands[0]); DONE;") + +(define_expand "bleu" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "ix86_expand_branch (LEU, operands[0]); DONE;") + +(define_expand "bunordered" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNORDERED, operands[0]); DONE;") + +(define_expand "bordered" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (ORDERED, operands[0]); DONE;") + +(define_expand "buneq" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNEQ, operands[0]); DONE;") + +(define_expand "bunge" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNGE, operands[0]); DONE;") + +(define_expand "bungt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNGT, operands[0]); DONE;") + +(define_expand "bunle" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNLE, operands[0]); DONE;") + +(define_expand "bunlt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (UNLT, operands[0]); DONE;") + +(define_expand "bltgt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (LTGT, operands[0]); DONE;") + +(define_insn "*jcc_1" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "%+j%C1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +(define_insn "*jcc_2" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "%+j%c1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; je Lfoo + +(define_split + [(set (pc) + (if_then_else (ne (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + PUT_MODE (operands[0], VOIDmode); +}) + +(define_split + [(set (pc) + (if_then_else (eq (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + rtx new_op0 = copy_rtx (operands[0]); + operands[0] = new_op0; + PUT_MODE (new_op0, VOIDmode); + PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op0, VOIDmode)) + FAIL; +}) + +;; Define combination compare-and-branch fp compare instructions to use +;; during early optimization. Splitting the operation apart early makes +;; for bad code when we want to reverse the operation. + +(define_insn "*fp_jcc_1_mixed" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_1_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_1_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_CMOVE && TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_mixed" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_CMOVE && TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_3_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_4_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_5_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_6_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_7_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +;; The order of operands in *fp_jcc_8_387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*fp_jcc_8<mode>_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) + (match_operand 3 "register_operand" "f,f")]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a,a"))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP + && FLOAT_MODE_P (GET_MODE (operands[3])) + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) + && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode + && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))" + "#") + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], NULL_RTX, NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "general_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a"))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], operands[5], NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], NULL_RTX); + DONE; +}) + +;; %%% Kill this when reload knows how to do it. +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "register_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], operands[2]); + DONE; +}) + +;; Unconditional and other jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "jmp\t%l0" + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] + "" + "") + +(define_insn "*indirect_jump" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] + "!TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "*indirect_jump_rtx64" + [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))] + "TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op0, op1; + enum rtx_code code; + + if (TARGET_64BIT) + { + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; + } + else + { + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; + } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_1" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "!TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "*tablejump_1_rtx64" + [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +;; Convert setcc + movzbl to xor + setcc if operands don't overlap. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Similar, but match zero_extendhisi2_and, which adds a clobber. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Call instructions. + +;; The predicates normally associated with named expanders are not properly +;; checked for calls. This is a bug in the generic code, but it isn't that +;; easy to fix. Ignore it for now and be prepared to fix things up. + +;; Call subroutine returning no value. + +(define_expand "call_pop" + [(parallel [(call (match_operand:QI 0 "" "") + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0); + DONE; +}) + +(define_insn "*call_pop_0" + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_pop_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], Pmode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A0"; + else + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_expand "call" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); + DONE; +}) + +(define_insn "*call_0" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" ""))] + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "jmp\t%P0"; + return "jmp\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" ""))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P0" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64_v" + [(call (mem:QI (reg:DI 40)) + (match_operand 0 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "call")]) + + +;; Call subroutine, returning value in operand 0 + +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 4 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4], 0); + DONE; +}) + +(define_expand "call_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1); + DONE; +}) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" +{ + int i; + + /* In order to give reg-stack an easier job in validating two + coprocessor registers as containing a possible return value, + simply pretend the untyped call returns a complex long double + value. */ + + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1), + NULL, 0); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage (const0_rtx)); + + DONE; +}) + +;; Prologue and epilogue instructions + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Insn emitted into the body of a function to return from a function. +;; This is only done if the function's epilogue is known to be simple. +;; See comments for ix86_can_use_return_insn_p in i386.c. + +(define_expand "return" + [(return)] + "ix86_can_use_return_insn_p ()" +{ + if (current_function_pops_args) + { + rtx popc = GEN_INT (current_function_pops_args); + emit_jump_insn (gen_return_pop_internal (popc)); + DONE; + } +}) + +(define_insn "return_internal" + [(return)] + "reload_completed" + "ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "return_internal_long" + [(return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep {;} ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +(define_insn "return_pop_internal" + [(return) + (use (match_operand:SI 0 "const_int_operand" ""))] + "reload_completed" + "ret\t%0" + [(set_attr "length" "3") + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +(define_insn "return_indirect_internal" + [(return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Align to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN + ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweigh the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) + +(define_expand "prologue" + [(const_int 1)] + "" + "ix86_expand_prologue (); DONE;") + +(define_insn "set_got" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + { return output_set_got (operands[0], NULL_RTX); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_labelled" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + { return output_set_got (operands[0], operands[1]); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_expand "epilogue" + [(const_int 1)] + "" + "ix86_expand_epilogue (1); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 1)] + "" + "ix86_expand_epilogue (0); DONE;") + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" ""))] + "" +{ + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; + + /* Tricky bit: we write the address of the handler to which we will + be returning into someone else's stack frame, one word below the + stack address we wish to restore. */ + tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); + tmp = plus_constant (tmp, -UNITS_PER_WORD); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (tmp, ra); + + if (Pmode == SImode) + emit_jump_insn (gen_eh_return_si (sa)); + else + emit_jump_insn (gen_eh_return_di (sa)); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_si" + [(set (pc) + (unspec [(match_operand:SI 0 "register_operand" "c")] + UNSPEC_EH_RETURN))] + "!TARGET_64BIT" + "#" + "reload_completed" + [(const_int 1)] + "ix86_expand_epilogue (2); DONE;") + +(define_insn_and_split "eh_return_di" + [(set (pc) + (unspec [(match_operand:DI 0 "register_operand" "c")] + UNSPEC_EH_RETURN))] + "TARGET_64BIT" + "#" + "reload_completed" + [(const_int 1)] + "ix86_expand_epilogue (2); DONE;") + +(define_insn "leave" + [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) + (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_insn "leave_rex64" + [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) + (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_expand "ffssi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn_and_split "*ffs_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn_and_split "*ffs_no_cmove" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[3] = gen_lowpart (QImode, operands[2]); + ix86_expand_clear (operands[2]); +}) + +(define_insn "*ffssi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_dup 1)))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_expand "ffsdi2" + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (ffs:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_CMOVE" + "") + +(define_insn_and_split "*ffs_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:DI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:DI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "*ffsdi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_dup 1)))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_expand "clzsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) +;; APPLE LOCAL begin mainline bswap +(define_insn "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BSWAP" + "bswap\t%k0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "2")]) + +(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (bswap:DI (match_operand:DI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BSWAP" + "bswap\t%0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "3")]) +;; APPLE LOCAL end mainline bswap + +(define_expand "clzdi2" + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*bsr_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsr{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_global_dynamic_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]} + push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop" + [(set_attr "type" "multi") + (set_attr "length" "14")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], operands[1], operands[2])); + DONE; + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_insn "*tls_local_dynamic_base_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]} + push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3" + [(set_attr "type" "multi") + (set_attr "length" "13")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], ix86_tls_module_base (), operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], ix86_tls_module_base ())); + DONE; + } + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Load and add the thread base pointer from %gs:0. + +(define_insn "*load_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "!TARGET_64BIT" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_64BIT" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP) + (match_operand:DI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand" "") + (const:SI + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "") + (match_operand:SI 4 "" "") + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "") + (match_operand:DI 3 "" "") + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. + +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. + +(define_insn "*fop_sf_comm_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "%0,0") + (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))] + "TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_comm_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_comm_i387" + [(set (match_operand:SF 0 "register_operand" "=f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_1_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,f,x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))] + "TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:SF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:SF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_1_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:SF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "mode" "SF")]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop_sf_1_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,fm") + (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop_sf_2<mode>_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:SF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_sf_3<mode>_i387" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "0,0") + (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_df_comm_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=f,x") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "%0,0") + (match_operand:DF 2 "nonimmediate_operand" "fm,Ym")]))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:DF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_comm_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "xm")]))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:DF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_comm_i387" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_1_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=f,f,x") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:DF 2 "nonimmediate_operand" "fm,0,xm")]))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:DF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:DF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_1_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")]))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set_attr "mode" "DF") + (set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:DF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd")))]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop_df_1_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,fm") + (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !COMMUTATIVE_ARITH_P (operands[3]) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop_df_2<mode>_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:DF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_df_3<mode>_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,0") + (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_df_4_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_5_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_6_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_2<mode>_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_3<mode>_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,0") + (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF (match_operand 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(float (match_operand:X87MODEI12 1 "register_operand" "")) + (match_operand 2 "register_operand" "")]))] + "TARGET_80387 && reload_completed + && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[4], + operands[2]))); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(match_operand 1 "register_operand" "") + (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] + "TARGET_80387 && reload_completed + && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[1], + operands[4]))); + ix86_free_from_memory (GET_MODE (operands[2])); + DONE; +}) + +;; FPU special functions. + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_USE_FANCY_MATH_387 || TARGET_SSE_MATH" +{ + if (!TARGET_SSE_MATH) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "*sqrtsf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm")))] + "TARGET_USE_FANCY_MATH_387 && TARGET_MIX_SSE_I387" + "@ + fsqrt + sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "fpspc,sse") + (set_attr "mode" "SF,SF") + (set_attr "athlon_decode" "direct,*")]) + +(define_insn "*sqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH" + "sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "*")]) + +(define_insn "*sqrtsf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "direct")]) + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "register_operand" "") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_USE_FANCY_MATH_387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (!(TARGET_SSE2 && TARGET_SSE_MATH)) + operands[1] = force_reg (DFmode, operands[1]); +}) + +(define_insn "*sqrtdf2_mixed" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=f,x") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0,xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_USE_FANCY_MATH_387 && TARGET_SSE2 && TARGET_MIX_SSE_I387" + "@ + fsqrt + sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "fpspc,sse") + (set_attr "mode" "DF,DF") + (set_attr "athlon_decode" "direct,*")]) + +(define_insn "*sqrtdf2_sse" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:DF 0 "register_operand" "=x") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "xm")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2 && TARGET_SSE_MATH" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*")]) + +(define_insn "*sqrtdf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (match_operand:DF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextendsfdf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextendsfxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextenddfxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "fpremxf4" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(const_int 0)] UNSPEC_NOP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fprem" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "fmodsf3" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" "")) + (use (match_operand:SF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn(gen_extendsfxf2 (op1, operands[1])); + emit_insn(gen_extendsfxf2 (op2, operands[2])); + + emit_label (label); + + emit_insn (gen_fpremxf4 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "fmoddf3" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" "")) + (use (match_operand:DF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_extenddfxf2 (op2, operands[2])); + + emit_label (label); + + emit_insn (gen_fpremxf4 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "fmodxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" "")) + (use (match_operand:XF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + emit_label (label); + + emit_insn (gen_fpremxf4 (operands[1], operands[2], + operands[1], operands[2])); + ix86_emit_fp_unordered_jump (label); + + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_insn "fprem1xf4" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM1_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM1_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(const_int 0)] UNSPEC_NOP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "dremsf3" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" "")) + (use (match_operand:SF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn(gen_extendsfxf2 (op1, operands[1])); + emit_insn(gen_extendsfxf2 (op2, operands[2])); + + emit_label (label); + + emit_insn (gen_fprem1xf4 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "dremdf3" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" "")) + (use (match_operand:DF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_extenddfxf2 (op2, operands[2])); + + emit_label (label); + + emit_insn (gen_fprem1xf4 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "dremxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" "")) + (use (match_operand:XF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx label = gen_label_rtx (); + + emit_label (label); + + emit_insn (gen_fprem1xf4 (operands[1], operands[2], + operands[1], operands[2])); + ix86_emit_fp_unordered_jump (label); + + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_insn "*sindf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*sinsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*sinextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(float_extend:DF + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*sinxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cosdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*cossf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*cosextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(float_extend:DF + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*cosxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; With sincos pattern defined, sin and cos builtin function will be +;; expanded to sincos pattern with one of its outputs left unused. +;; Cse pass will detected, if two sincos patterns can be combined, +;; otherwise sincos pattern will be split back to sin or cos pattern, +;; depending on the unused output. + +(define_insn "sincosdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "=u") + (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_operand:DF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "") + (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_operand:DF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "") + (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_COS))] + "") + +(define_insn "sincossf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:SF 1 "register_operand" "=u") + (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:SF 1 "register_operand" "") + (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:SF 1 "register_operand" "") + (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_COS))] + "") + +(define_insn "*sincosextendsfdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(float_extend:DF + (match_operand:SF 2 "register_operand" "0"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "=u") + (unspec:DF [(float_extend:DF + (match_dup 2))] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(float_extend:DF + (match_operand:SF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "") + (unspec:DF [(float_extend:DF + (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 1) (unspec:DF [(float_extend:DF + (match_dup 2))] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(float_extend:DF + (match_operand:SF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:DF 1 "register_operand" "") + (unspec:DF [(float_extend:DF + (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 0) (unspec:DF [(float_extend:DF + (match_dup 2))] UNSPEC_COS))] + "") + +(define_insn "sincosxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !reload_completed && !reload_in_progress" + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] + "") + +(define_insn "*tandf3_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 2 "register_operand" "0")] + UNSPEC_TAN_ONE)) + (set (match_operand:DF 1 "register_operand" "=u") + (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +;; optimize sequence: fptan +;; fstp %st(0) +;; fld1 +;; into fptan insn. + +(define_peephole2 + [(parallel[(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_operand:DF 2 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:DF 1 "register_operand" "") + (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))]) + (set (match_dup 0) + (match_operand:DF 3 "immediate_operand" ""))] + "standard_80387_constant_p (operands[3]) == 2" + [(parallel[(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_TAN_ONE)) + (set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])] + "") + +(define_expand "tandf2" + [(parallel [(set (match_dup 2) + (unspec:DF [(match_operand:DF 1 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (DFmode); +}) + +(define_insn "*tansf3_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 2 "register_operand" "0")] + UNSPEC_TAN_ONE)) + (set (match_operand:SF 1 "register_operand" "=u") + (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +;; optimize sequence: fptan +;; fstp %st(0) +;; fld1 +;; into fptan insn. + +(define_peephole2 + [(parallel[(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 2 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:SF 1 "register_operand" "") + (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))]) + (set (match_dup 0) + (match_operand:SF 3 "immediate_operand" ""))] + "standard_80387_constant_p (operands[3]) == 2" + [(parallel[(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_TAN_ONE)) + (set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])] + "") + +(define_expand "tansf2" + [(parallel [(set (match_dup 2) + (unspec:SF [(match_operand:SF 1 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (SFmode); +}) + +(define_insn "*tanxf3_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN_ONE)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; optimize sequence: fptan +;; fstp %st(0) +;; fld1 +;; into fptan insn. + +(define_peephole2 + [(parallel[(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))]) + (set (match_dup 0) + (match_operand:XF 3 "immediate_operand" ""))] + "standard_80387_constant_p (operands[3]) == 2" + [(parallel[(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_TAN_ONE)) + (set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])] + "") + +(define_expand "tanxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_TAN_ONE)) + (set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_TAN_TAN))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); +}) + +(define_insn "atan2df3_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 2 "register_operand" "0") + (match_operand:DF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:DF 3 "=1"))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_expand "atan2df3" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (DFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2df3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_expand "atandf2" + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_dup 2) + (match_operand:DF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:DF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (DFmode); + emit_move_insn (operands[2], CONST1_RTX (DFmode)); /* fld1 */ +}) + +(define_insn "atan2sf3_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 2 "register_operand" "0") + (match_operand:SF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:SF 3 "=1"))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_expand "atan2sf3" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (SFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2sf3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_expand "atansf2" + [(parallel [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_dup 2) + (match_operand:SF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:SF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (SFmode); + emit_move_insn (operands[2], CONST1_RTX (SFmode)); /* fld1 */ +}) + +(define_insn "atan2xf3_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=1"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 2 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (XFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2xf3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asindf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2))) + (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3))) + (set (match_dup 6) (sqrt:XF (match_dup 5))) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 2)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 8 ""))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<8; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asinsf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2))) + (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3))) + (set (match_dup 6) (sqrt:XF (match_dup 5))) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 2)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 8 ""))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<8; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asinxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 5) (match_dup 1)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acosdf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2))) + (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3))) + (set (match_dup 6) (sqrt:XF (match_dup 5))) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 2) (match_dup 6)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 8 ""))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<8; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acossf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2))) + (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3))) + (set (match_dup 6) (sqrt:XF (match_dup 5))) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 2) (match_dup 6)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 8 ""))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<8; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acosxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1) (match_dup 5)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_insn "fyl2x_xf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=1"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logsf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[3], temp); +}) + +(define_expand "logdf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[3], temp); +}) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[2], temp); +}) + +(define_expand "log10sf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + temp = standard_80387_constant_rtx (3); /* fldlg2 */ + emit_move_insn (operands[3], temp); +}) + +(define_expand "log10df2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + temp = standard_80387_constant_rtx (3); /* fldlg2 */ + emit_move_insn (operands[3], temp); +}) + +(define_expand "log10xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (3); /* fldlg2 */ + emit_move_insn (operands[2], temp); +}) + +(define_expand "log2sf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log2df2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (parallel [(set (match_dup 4) + (unspec:XF [(match_dup 2) + (match_dup 3)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 5 ""))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log2xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_insn "fyl2xp1_xf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=1"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "log1psf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + ix86_emit_i387_log1p (op0, op1); + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log1pdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + ix86_emit_i387_log1p (op0, op1); + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log1pxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + ix86_emit_i387_log1p (operands[0], operands[1]); + DONE; +}) + +(define_insn "*fxtractxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logbsf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (parallel [(set (match_dup 3) + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); +}) + +(define_expand "logbdf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (parallel [(set (match_dup 3) + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 4)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); +}) + +(define_expand "logbxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); +}) + +(define_expand "ilogbsi2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 3 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))]) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (fix:SI (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_insn "*f2xm1xf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*fscalexf4" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FSCALE_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FSCALE_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fscale" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expsf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 10)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<12; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expdf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 10)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<12; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[2], temp); + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp10sf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 10)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<12; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (6); /* fldl2t */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp10df2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 10)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<12; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (6); /* fldl2t */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp10xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (6); /* fldl2t */ + emit_move_insn (operands[2], temp); + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp2sf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT)) + (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1)) + (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6))) + (parallel [(set (match_dup 8) + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 8)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp2df2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT)) + (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1)) + (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6))) + (parallel [(set (match_dup 8) + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 8)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "exp2xf2" + [(set (match_dup 2) (match_operand:XF 1 "register_operand" "")) + (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT)) + (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1)) + (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 7) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=2; i<9; i++) + operands[i] = gen_reg_rtx (XFmode); + emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expm1df2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 8) + (unspec:XF [(match_dup 7) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 7) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 11) + (unspec:XF [(match_dup 10) (match_dup 9)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 12) + (unspec:XF [(match_dup 10) (match_dup 9)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10))) + (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8))) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 14)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<15; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expm1sf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 8) + (unspec:XF [(match_dup 7) (match_dup 5)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 7) (match_dup 5)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 11) + (unspec:XF [(match_dup 10) (match_dup 9)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 12) + (unspec:XF [(match_dup 10) (match_dup 9)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10))) + (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8))) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 14)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<15; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expm1xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9))) + (set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_dup 12) (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<13; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[2], temp); + emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "ldexpdf3" + [(set (match_dup 3) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_dup 5) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 6) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 5)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<7; i++) + operands[i] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexpsf3" + [(set (match_dup 3) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_dup 5) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 6) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 5)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<7; i++) + operands[i] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexpxf3" + [(set (match_dup 3) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 1) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<5; i++) + operands[i] = gen_reg_rtx (XFmode); +}) + + +(define_insn "frndintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "rintdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2 (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "rintsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2 (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "rintxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2 (operands[0], operands[1])); + DONE; +}) + +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])] + "") + +(define_insn_and_split "*fist<mode>2_1" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1], + operands[2])); + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2_with_temp" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST))] + "") + +(define_expand "lrint<mode>2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_floor" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + + emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_floor_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_expand "floorxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); + DONE; +}) + +(define_expand "floordf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "floorsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn_and_split "*fist<mode>2_floor_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist<mode>2_floor (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "<MODE>")]) + +(define_insn "fistdi2_floor" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_floor_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist<mode>2_floor" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2_floor_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lfloor<mode>2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_ceil" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + + emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_ceil_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_expand "ceilxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); + DONE; +}) + +(define_expand "ceildf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "ceilsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn_and_split "*fist<mode>2_ceil_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "<MODE>")]) + +(define_insn "fistdi2_ceil" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_ceil_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist<mode>2_ceil" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2_ceil_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lceil<mode>2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_trunc" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + + emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_trunc_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_expand "btruncxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); + DONE; +}) + +(define_expand "btruncdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_trunc (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "btruncsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_trunc (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); + + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_expand "nearbyintxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); + + DONE; +}) + +(define_expand "nearbyintdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "nearbyintsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + DONE; +}) + + +;; Block operation instructions + +(define_insn "cld" + [(set (reg:SI DIRFLAG_REG) (const_int 0))] + "" + "cld" + [(set_attr "type" "cld")]) + +(define_expand "movmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))] + "! optimize_size || TARGET_INLINE_ALL_STRINGOPS" +{ + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "movmemdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:DI 2 "nonmemory_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))] + "TARGET_64BIT" +{ + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand" "")) + (set (match_operand 1 "memory_operand" "") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); + + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); + + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); + DONE; + } + + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); +}) + +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 5 "" "")) + (use (reg:SI DIRFLAG_REG))])] + "TARGET_SINGLE_STRINGOP || optimize_size" + "") + +(define_insn "*strmovdi_rex_1" + [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) + (mem:DI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 8))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 8))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsq" + [(set_attr "type" "str") + (set_attr "mode" "DI") + (set_attr "memory" "both")]) + +(define_insn "*strmovsi_1" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 4))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 4))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{movsl|movsd}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "*strmovsi_rex_1" + [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) + (mem:SI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 4))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 4))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{movsl|movsd}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "*strmovhi_1" + [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) + (mem:HI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 2))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 2))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovhi_rex_1" + [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) + (mem:HI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 2))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 2))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovqi_1" + [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) + (mem:QI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 1))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 1))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_insn "*strmovqi_rex_1" + [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) + (mem:QI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 1))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 1))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 5 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 6 "" "")) + (set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))])] + "" + "") + +(define_insn "*rep_movdi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 3)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;movsq|rep movsq}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*rep_movsi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (ashift:SI (match_dup 5) (const_int 2)) + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT" + "{rep\;movsl|rep movsd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movsi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 2)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;movsl|rep movsd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 5 "register_operand" "2"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT" + "{rep\;movsb|rep movsb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 5 "register_operand" "2"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;movsb|rep movsb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_expand "setmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:SI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" ""))] + "" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (ix86_expand_clrmem (operands[0], operands[1], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "setmemdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:DI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" ""))] + "TARGET_64BIT" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (ix86_expand_clrmem (operands[0], operands[1], operands[3])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strset" + [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (parallel [(set (match_operand 0 "register_operand" "") + (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 3 "" "")) + (use (reg:SI DIRFLAG_REG))])] + "TARGET_SINGLE_STRINGOP || optimize_size" + "") + +(define_insn "*strsetdi_rex_1" + [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 8))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosq" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*strsetsi_1" + [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 4))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{stosl|stosd}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsetsi_rex_1" + [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 4))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{stosl|stosd}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsethi_1" + [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 2))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsethi_rex_1" + [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 2))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsetqi_1" + [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 1))) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*strsetqi_rex_1" + [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 1))) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "memory_operand" "") (const_int 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_dup 1)) + (use (reg:SI DIRFLAG_REG))])] + "" + "") + +(define_insn "*rep_stosdi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:DI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;stosq|rep stosq}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*rep_stossi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT" + "{rep\;stosl|rep stosd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stossi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;stosl|rep stosd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stosqi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))] + "!TARGET_64BIT" + "{rep\;stosb|rep stosb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*rep_stosqi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI DIRFLAG_REG))] + "TARGET_64BIT" + "{rep\;stosb|rep stosb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_expand "cmpstrnsi" + [(set (match_operand:SI 0 "register_operand" "") + (compare:SI (match_operand:BLK 1 "general_operand" "") + (match_operand:BLK 2 "general_operand" ""))) + (use (match_operand 3 "general_operand" "")) + (use (match_operand 4 "immediate_operand" ""))] + /* APPLE LOCAL x86_64 disable inline expansion for memcmp until 4436760 is fixed */ + "(! optimize_size || TARGET_INLINE_ALL_STRINGOPS) && !TARGET_64BIT" +{ + /* APPLE LOCAL begin 4134111 */ + rtx addr1, addr2, out, outlow, count, countreg, align, scratch; + + /* Can't use this if the user has appropriated esi or edi. */ + if (global_regs[4] || global_regs[5]) + FAIL; + + /* The Darwin expansion is unsafe on volatile objects. */ + if (TARGET_MACHO + && (MEM_VOLATILE_P (operands[1]) || MEM_VOLATILE_P (operands[2]))) + FAIL; + + /* APPLE LOCAL begin 4134510 */ + if (TARGET_MACHO) + { + count = operands[3]; + if (GET_CODE (count) != CONST_INT || INTVAL (count) > 30) + FAIL; + } + /* APPLE LOCAL end 4134510 */ + + if (TARGET_MACHO) + scratch = gen_reg_rtx (SImode); + + out = operands[0]; + if (GET_CODE (out) != REG) + out = gen_reg_rtx (SImode); + + addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + + count = operands[3]; + countreg = ix86_zero_extend_to_Pmode (count); + + /* %%% Iff we are testing strict equality, we can use known alignment + to good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + align = operands[4]; + + emit_insn (gen_cld ()); + if (GET_CODE (count) == CONST_INT) + { + if (INTVAL (count) == 0) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (!TARGET_MACHO) + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + else + emit_insn (gen_darwin_cmpstrnqi_nz_1 (out, addr1, addr2, countreg, align, + operands[1], operands[2], scratch)); + } + else + { + if (TARGET_64BIT) + emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); + else + emit_insn (gen_cmpsi_1 (countreg, countreg)); + if (!TARGET_MACHO) + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + else + emit_insn (gen_darwin_cmpstrqi_1 (out, addr1, addr2, countreg, align, + operands[1], operands[2], scratch)); + } + + if (!TARGET_MACHO) + { + outlow = gen_lowpart (QImode, out); + emit_insn (gen_cmpintqi (outlow)); + emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); + } + /* APPLE LOCAL end 4134111 */ + + if (operands[0] != out) + emit_move_insn (operands[0], out); + + DONE; +}) + +;; Produce a tri-state integer (-1, 0, 1) from condition codes. + +(define_expand "cmpintqi" + [(set (match_dup 1) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_dup 2) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (parallel [(set (match_operand:QI 0 "register_operand" "") + (minus:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" + "operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode);") + +;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is +;; zero. Emit extra code to make sure that a zero-length compare is EQ. + +(define_expand "cmpstrnqi_nz_1" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" ""))) + (use (match_operand 2 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "") + +(define_insn "*cmpstrnqi_nz_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1")))) + (use (match_operand:SI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*cmpstrnqi_nz_rex_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1")))) + (use (match_operand:DI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; The same, but the count is not known to not be zero. + +(define_expand "cmpstrnqi_1" + [(parallel [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 2 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" "")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "") + +(define_insn "*cmpstrnqi_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*cmpstrnqi_rex_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; APPLE LOCAL begin 4134111 +;; Darwin's memcmp returns the difference of the last 2 bytes compared, +;; not -1/0/1. Unfortunately we must reload those bytes to get the +;; result, as they aren't sitting around anywhere. This is still +;; faster than calling libc though. + +(define_expand "darwin_cmpstrnqi_nz_1" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (compare:CC (match_operand 5 "memory_operand" "") + (match_operand 6 "memory_operand" "")) 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_operand:SI 4 "immediate_operand" "")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" "")) + (clobber (match_operand 7 "register_operand" "")) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_dup 3))])] + "" + "") + +(define_insn "*darwin_cmpstrnqi_nz_1" +;; APPLE LOCAL 5379188 + [(set (match_operand:SI 0 "register_operand" "=&r") + (subreg:SI (compare:CC (mem:BLK (match_operand:SI 5 "register_operand" "1")) + (mem:BLK (match_operand:SI 6 "register_operand" "2"))) 0)) + (use (match_operand:SI 8 "register_operand" "3")) + (use (match_operand:SI 4 "immediate_operand" "i")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:SI 1 "register_operand" "=S")) + (clobber (match_operand:SI 2 "register_operand" "=D")) + (clobber (match_operand:SI 7 "register_operand" "=r")) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 3 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb\n\tmov\t$0, %0\n\tje\t0f\n\tmovzbl\t-1(%5), %0\n\tmovzbl\t-1(%6), %8\n\tsubl\t%8,%0\n0:" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_expand "darwin_cmpstrqi_1" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (if_then_else:CC (ne (match_operand 3 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 5 "memory_operand" "") + (match_operand 6 "memory_operand" "")) + (const_int 0)) 0)) + (use (match_operand:SI 4 "immediate_operand" "")) + (use (reg:SI DIRFLAG_REG)) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" "")) + (clobber (match_operand 7 "register_operand" "")) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_dup 3))])] + "" + "") + +(define_insn "*darwin_cmpstrqi_1" +;; APPLE LOCAL 5379188 + [(set (match_operand:SI 0 "register_operand" "=&r") + (subreg:SI (if_then_else:CC (ne (match_operand:SI 8 "register_operand" "3") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:SI 5 "register_operand" "1")) + (mem:BLK (match_operand:SI 6 "register_operand" "2"))) + (const_int 0)) 0)) + (use (match_operand:SI 4 "immediate_operand" "i")) + (use (reg:SI DIRFLAG_REG)) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 1 "register_operand" "=S")) + (clobber (match_operand:SI 2 "register_operand" "=D")) + (clobber (match_operand:SI 7 "register_operand" "=r")) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 3 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb\n\tmov\t$0, %0\n\tje\t0f\n\tmovzbl\t-1(%5), %0\n\tmovzbl\t-1(%6), %8\n\tsubl\t%8,%0\n0:" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; APPLE LOCAL end 4134111 + +(define_expand "strlensi" + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlendi" + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" "")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 1 "register_operand" "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*strlenqi_1" + [(set (match_operand:SI 0 "register_operand" "=&c") + (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "repnz{\;| }scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*strlenqi_rex_1" + [(set (match_operand:DI 0 "register_operand" "=&c") + (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:DI 3 "immediate_operand" "i") + (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "repnz{\;| }scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; Peephole optimizations to clean up after cmpstrn*. This should be +;; handled in combine, but it is not currently up to the task. +;; When used for their truth value, the cmpstrn* expanders generate +;; code like this: +;; +;; repz cmpsb +;; seta %al +;; setb %dl +;; cmpb %al, %dl +;; jcc label +;; +;; The intermediate three instructions are unnecessary. + +;; This one handles cmpstrn*_nz_1... +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" "")))) + (use (match_operand 6 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5)))) + (use (match_dup 6)) + (use (match_dup 3)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] + "") + +;; ...and this one handles cmpstrn*_1. +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 6 "register_operand" "") + (const_int 0)) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" ""))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_dup 6) + (const_int 0)) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5))) + (const_int 0))) + (use (match_dup 3)) + (use (reg:CC FLAGS_REG)) + (use (reg:SI DIRFLAG_REG)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] + "") + + + +;; Conditional move instructions. + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "general_operand" "") + (match_operand:DI 3 "general_operand" "")))] + "TARGET_64BIT" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn "x86_movdicc_0_m1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "sbb{q}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdicc_c_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DI 2 "nonimmediate_operand" "rm,0") + (match_operand:DI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_64BIT && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "DI")]) + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "general_operand" "") + (match_operand:SI 3 "general_operand" "")))] + "" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing +;; the register first winds up with `sbbl $0,reg', which is also weird. +;; So just document what we're doing explicitly. + +(define_insn "x86_movsicc_0_m1" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsicc_noc" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "nonimmediate_operand" "rm,0") + (match_operand:SI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + +(define_expand "movhicc" + [(set (match_operand:HI 0 "register_operand" "") + (if_then_else:HI (match_operand 1 "comparison_operator" "") + (match_operand:HI 2 "general_operand" "") + (match_operand:HI 3 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn "*movhicc_noc" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (if_then_else:HI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:HI 2 "nonimmediate_operand" "rm,0") + (match_operand:HI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "HI")]) + +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "general_operand" "") + (match_operand:QI 3 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) + +(define_expand "movdfcc" + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (match_operand 1 "comparison_operator" "") + (match_operand:DF 2 "register_operand" "") + (match_operand:DF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_insn "*movdfcc_1" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + # + #" + [(set_attr "type" "fcmov,fcmov,multi,multi") + (set_attr "mode" "DF")]) + +(define_insn "*movdfcc_1_rex64" + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "") + (match_operand:DF 3 "nonimmediate_operand" "")))] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 2) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 5) + (match_dup 7))) + (set (match_dup 3) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 6) + (match_dup 8)))] + "split_di (operands+2, 1, operands+5, operands+6); + split_di (operands+3, 1, operands+7, operands+8); + split_di (operands, 1, operands+2, operands+3);") + +(define_expand "movxfcc" + [(set (match_operand:XF 0 "register_operand" "") + (if_then_else:XF (match_operand 1 "comparison_operator" "") + (match_operand:XF 2 "register_operand" "") + (match_operand:XF 3 "register_operand" "")))] + "TARGET_80387 && TARGET_CMOVE" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "TARGET_80387 && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "sminsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "smaxsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "smindf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "smaxdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*ieee_sminsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE_MATH" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smaxsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE_MATH" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smindf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "*ieee_smaxdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[1], operands[0]);") + +;; Conditional addition patterns +(define_expand "addqicc" + [(match_operand:QI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "addhicc" + [(match_operand:HI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:HI 2 "register_operand" "") + (match_operand:HI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "addsicc" + [(match_operand:SI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "adddicc" + [(match_operand:DI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "register_operand" "") + (match_operand:DI 3 "const_int_operand" "")] + "TARGET_64BIT" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + + +;; Misc patterns (?) + +;; This pattern exists to put a dependency on all ebp-based memory accesses. +;; Otherwise there will be nothing to keep +;; +;; [(set (reg ebp) (reg esp))] +;; [(set (reg esp) (plus (reg esp) (const_int -160000))) +;; (clobber (eflags)] +;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] +;; +;; in proper program order. +(define_insn "pro_epilogue_adjust_stack_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{l}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:SI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "SI")]) + +(define_insn "pro_epilogue_adjust_stack_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{q}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:DI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "DI")]) + +(define_insn "pro_epilogue_adjust_stack_rex64_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 3 "immediate_operand" "i,i"))) + (use (match_operand:DI 2 "register_operand" "r,r")) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + return "add{q}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,lea") + (set_attr "mode" "DI")]) + +(define_expand "allocate_stack_worker" + [(match_operand:SI 0 "register_operand" "")] + "TARGET_STACK_PROBE" +{ + if (reload_completed) + { + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0])); + else + emit_insn (gen_allocate_stack_worker_postreload (operands[0])); + } + else + { + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); + else + emit_insn (gen_allocate_stack_worker_1 (operands[0])); + } + DONE; +}) + +(define_insn "allocate_stack_worker_1" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) + (clobber (match_scratch:SI 1 "=0")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_STACK_PROBE" + "call\t__alloca" + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_expand "allocate_stack_worker_postreload" + [(parallel [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) + (clobber (match_dup 0)) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "allocate_stack_worker_rex64" + [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0))) + (clobber (match_scratch:DI 1 "=0")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_STACK_PROBE" + "call\t__alloca" + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_expand "allocate_stack_worker_rex64_postreload" + [(parallel [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0))) + (clobber (match_dup 0)) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_expand "allocate_stack" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (reg:SI SP_REG) + (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (reg:SI SP_REG) + (minus:SI (reg:SI SP_REG) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_STACK_PROBE" +{ +#ifdef CHECK_STACK_LIMIT + if (GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) < CHECK_STACK_LIMIT) + emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, + operands[1])); + else +#endif + emit_insn (gen_allocate_stack_worker (copy_to_mode_reg (SImode, + operands[1]))); + + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; +}) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "!TARGET_64BIT && flag_pic" +{ + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = gen_rtx_CONST (SImode, + gen_rtx_MINUS (SImode, + gen_rtx_LABEL_REF (SImode, label_rtx), + gen_rtx_SYMBOL_REF (SImode, GOT_SYMBOL_NAME))); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else + emit_insn (gen_set_got (pic_offset_table_rtx)); + DONE; +}) + +;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "promotable_binary_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "aligned_operand" "")])) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ((GET_MODE (operands[0]) == HImode + && ((!optimize_size && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ + || GET_CODE (operands[2]) != CONST_INT + || satisfies_constraint_K (operands[2]))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + if (GET_CODE (operands[3]) != ASHIFT) + operands[2] = gen_lowpart (SImode, operands[2]); + PUT_MODE (operands[3], SImode);") + +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)])) + (set (match_operand 1 "register_operand" "") + (and (match_dup 3) (match_dup 4)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) + +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand" "") + (match_operand:HI 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (neg (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (not (match_operand 1 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(set (match_dup 0) + (not:SI (match_dup 1)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand 2 "register_operand" "") + (match_operand 3 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);") + + +;; RTL Peephole optimizations, run before sched2. These primarily look to +;; transform a complex memory operation into two memory to register operations. + +;; Don't push memory operands +(define_peephole2 + [(set (match_operand:SI 0 "push_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (match_scratch:SI 2 "r")] + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "memory_operand" "")) + (match_scratch:DI 2 "r")] + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to handle SFmode only, because DFmode and XFmode is split to +;; SImode pushes. +(define_peephole2 + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (match_scratch:SF 2 "r")] + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "push_operand" "") + (match_operand:HI 1 "memory_operand" "")) + (match_scratch:HI 2 "r")] + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "push_operand" "") + (match_operand:QI 1 "memory_operand" "")) + (match_scratch:QI 2 "q")] + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't move an immediate directly to memory when the instruction +;; gets too big. +(define_peephole2 + [(match_scratch:SI 1 "r") + (set (match_operand:SI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 1) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "") + +(define_peephole2 + [(match_scratch:HI 1 "r") + (set (match_operand:HI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:QI 1 "q") + (set (match_operand:QI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (set (match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "! optimize_size + && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:HI 2 "r") + (set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:QI 2 "q") + (set (match_operand:QI 0 "memory_operand" "") + (match_operand:QI 1 "immediate_operand" ""))] + "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't compare memory with zero, load and use a test instead. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand" "") + (const_int 0)])) + (match_scratch:SI 3 "r")] + "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] + "") + +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; Don't split NOTs with a displacement operand, because resulting XOR +;; will not be pairable anyway. +;; +;; On AMD K6, NOT is vector decoded with memory operand that cannot be +;; represented using a modRM byte. The XOR replacement is long decoded, +;; so this split helps here as well. +;; +;; Note: Can't do this as a regular split because we can't get proper +;; lifetime information then. + +(define_peephole2 + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], SImode))) + || (TARGET_K6 && long_memory_operand (operands[0], SImode)))" + [(parallel [(set (match_dup 0) + (xor:SI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], HImode))) + || (TARGET_K6 && long_memory_operand (operands[0], HImode)))" + [(parallel [(set (match_dup 0) + (xor:HI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], QImode))) + || (TARGET_K6 && long_memory_operand (operands[0], QImode)))" + [(parallel [(set (match_dup 0) + (xor:QI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Non pairable "test imm, reg" instructions can be translated to +;; "and imm, reg" if reg dies. The "and" form is also shorter (one +;; byte opcode instead of two, have a short form for byte operands), +;; so do it for other CPUs as well. Given that the value was dead, +;; this should not create any new dependencies. Pass on the sub-word +;; versions if we're concerned about partial register stalls. + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode) + && (true_regnum (operands[2]) != 0 + || satisfies_constraint_K (operands[3])) + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])] + "") + +;; We don't need to handle HImode case, because it will be promoted to SImode +;; on ! TARGET_PARTIAL_REG_STALL + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "immediate_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != 0 + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])] + "") + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != 0 + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)))])] + "") + +;; Don't do logical operations with memory inputs. +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "! optimize_size && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "memory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "! optimize_size && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2) (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "") + +; Don't do logical operations with memory outputs +; +; These two don't make sense for PPro/PII -- we're expanding a 4-uop +; instruction into two 1-uop insns plus a 2-uop insn. That last has +; the same decoder scheduling characteristics as the original. + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 2) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "nonmemory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))] + "") + +;; Attempt to always use XOR for zeroing registers. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "const0_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (! TARGET_USE_MOV0 || optimize_size) + && GENERAL_REG_P (operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (word_mode, operands[0]); +}) + +(define_peephole2 + [(set (strict_low_part (match_operand 0 "register_operand" "")) + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode) + && (! TARGET_USE_MOV0 || optimize_size) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + +;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (const_int -1))] + "(GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == SImode + || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) + && (optimize_size || TARGET_PENTIUM) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int -1)) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, + operands[0]);") + +;; Attempt to convert simple leas to adds. These can be created by +;; move expanders. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")) 0))] + "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_dup 0) + (match_operand:DI 1 "x86_64_general_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_dup 0) + (match_operand:DI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")) 0))] + "exact_log2 (INTVAL (operands[2])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On +;; many CPUs it is also faster, since special hardware to avoid esp +;; dependencies is present. + +;; While some of these conversions may be done using splitters, we use peepholes +;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. + +;; Convert prologue esp subtractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Convert compares with 1 to shorter inc/dec operations when CF is not +;; required and register dies. Similarly for 128 to plus -128. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")]))] + "(INTVAL (operands[3]) == -1 + || INTVAL (operands[3]) == 1 + || INTVAL (operands[3]) == 128) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Convert imul by three, five and nine into lea +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9" + [(set (match_dup 0) + (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "!optimize_size + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) + (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && !optimize_size + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +;; Imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "memory_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] +"") + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:DI 3 "r")] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:SI 3 "r")] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:HI 3 "r")] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax + +(define_peephole2 + [(parallel [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand 3 "register_operand") + (match_operand 4 "x86_64_general_operand" "")) + (parallel [(set (match_operand 5 "register_operand" "") + (plus (match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3 + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + /* We reorder load and the shift. */ + && !rtx_equal_p (operands[1], operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + /* Last PLUS must consist of operand 0 and 3. */ + && !rtx_equal_p (operands[0], operands[3]) + && (rtx_equal_p (operands[3], operands[6]) + || rtx_equal_p (operands[3], operands[7])) + && (rtx_equal_p (operands[0], operands[6]) + || rtx_equal_p (operands[0], operands[7])) + /* The intermediate operand 0 must die or be same as output. */ + && (rtx_equal_p (operands[0], operands[5]) + || peep2_reg_dead_p (3, operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (Pmode, operands[1]); + rtx base = gen_lowpart (Pmode, operands[3]); + rtx dest = gen_lowpart (mode, operands[5]); + + operands[1] = gen_rtx_PLUS (Pmode, base, + gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + if (mode != Pmode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + operands[0] = dest; +}) + +;; Call-value patterns last so that the wildcard operand does not +;; disrupt insn-recog's switch tables. + +(define_insn "*call_value_pop_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], Pmode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A1"; + else + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" "")))] + "TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) + (match_operand:SI 2 "" "")))] + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a")) + (match_operand:SI 2 "" "")))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "jmp\t%P1"; + return "jmp\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" "")))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P1" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64_v" + [(set (match_operand 0 "" "") + (call (mem:QI (reg:DI 40)) + (match_operand:DI 1 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "callv")]) + +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 6))] + "" + { return ASM_SHORT "0x0b0f"; } + [(set_attr "length" "2")]) + +(define_expand "sse_prologue_save" + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (label_ref:DI (match_operand 3 "" "")))])] + "TARGET_64BIT" + "") + +(define_insn "*sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X")))] + "TARGET_64BIT + && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" + "* +{ + int i; + operands[0] = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, operands[0], operands[4])); + output_asm_insn (\"jmp\\t%A1\", operands); + for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + { + operands[4] = adjust_address (operands[0], DImode, i*16); + operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); + PUT_MODE (operands[4], TImode); + if (GET_CODE (XEXP (operands[0], 0)) != PLUS) + output_asm_insn (\"rex\", operands); + output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + } + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[3])); + RET; +} + " + [(set_attr "type" "other") + (set_attr "length_immediate" "0") + (set_attr "length_address" "0") + (set_attr "length" "135") + (set_attr "memory" "store") + (set_attr "modrm" "0") + (set_attr "mode" "DI")]) + +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + gcc_assert (rw == 0 || rw == 1); + gcc_assert (locality >= 0 && locality <= 3); + gcc_assert (GET_MODE (operands[0]) == Pmode + || GET_MODE (operands[0]) == VOIDmode); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + supported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && !TARGET_64BIT" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_sse_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && TARGET_64BIT" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && !TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" +{ +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_set_di (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_set_si (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_set_di (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); +#endif + DONE; +}) + +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + else + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + } + [(set_attr "type" "multi")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + ix86_compare_emitted = flags; + +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_test_di (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_test_si (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1])); + else + emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1])); +#endif + emit_jump_insn (gen_beq (operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:SI 3 "=&r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_64BIT" + "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:SI 3 "=r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:DI 3 "=r"))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"; + else + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}"; + } + [(set_attr "type" "multi")]) + +; APPLE LOCAL begin 3399553 + +; Expand the builtin FLT_ROUNDS by reading the x87 FPSR rounding bits. + +(define_expand "flt_rounds" + [(set (match_operand 0 "nonimmediate_operand" "") + (unspec:SI [(reg:CCFP FPSR_REG)] UNSPEC_FLT_ROUNDS))] + "" + " + { + ix86_expand_flt_rounds (operands[0]); + DONE; + } + " +) +; APPLE LOCAL end 3399553 +; APPLE LOCAL begin mainline +(include "mmx.md") +(include "sse.md") +; APPLE LOCAL end mainline +(include "sync.md") + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_mode_macro CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32<mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32<crc32modesuffix>\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32q\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 5612787 mainline sse4 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt b/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt new file mode 100644 index 000000000..578ea36e7 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt @@ -0,0 +1,262 @@ +; Options for the IA-32 and AMD64 ports of the compiler. + +; Copyright (C) 2005 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 2, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING. If not, write to the Free +; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +; 02110-1301, USA. + +m128bit-long-double +Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) +sizeof(long double) is 16 + +m32 +Target RejectNegative Negative(m64) Report InverseMask(64BIT) +Generate 32bit i386 code + +m386 +Target RejectNegative Undocumented +;; Deprecated + +m3dnow +Target Report Mask(3DNOW) +Support 3DNow! built-in functions + +m486 +Target RejectNegative Undocumented +;; Deprecated + +m64 +Target RejectNegative Negative(m32) Report Mask(64BIT) +Generate 64bit x86-64 code + +m80387 +Target Report Mask(80387) +Use hardware fp + +m96bit-long-double +Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) +sizeof(long double) is 12 + +maccumulate-outgoing-args +Target Report Mask(ACCUMULATE_OUTGOING_ARGS) +Reserve space for outgoing arguments in the function prologue + +malign-double +Target Report Mask(ALIGN_DOUBLE) +Align some doubles on dword boundary + +malign-functions= +Target RejectNegative Joined Var(ix86_align_funcs_string) +Function starts are aligned to this power of 2 + +malign-jumps= +Target RejectNegative Joined Var(ix86_align_jumps_string) +Jump targets are aligned to this power of 2 + +malign-loops= +Target RejectNegative Joined Var(ix86_align_loops_string) +Loop code aligned to this power of 2 + +malign-stringops +Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) +Align destination of the string operations + +march= +Target RejectNegative Joined Var(ix86_arch_string) +Generate code for given CPU + +masm= +Target RejectNegative Joined Var(ix86_asm_string) +Use given assembler dialect + +mbranch-cost= +Target RejectNegative Joined Var(ix86_branch_cost_string) +Branches are this expensive (1-5, arbitrary units) + +mlarge-data-threshold= +Target RejectNegative Joined Var(ix86_section_threshold_string) +Data greater than given threshold will go into .ldata section in x86-64 medium model + +mcmodel= +Target RejectNegative Joined Var(ix86_cmodel_string) +Use given x86-64 code model + +mdebug-addr +Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented + +mdebug-arg +Target RejectNegative Var(TARGET_DEBUG_ARG) Undocumented + +mfancy-math-387 +Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) +Generate sin, cos, sqrt for FPU + +mfp-ret-in-387 +Target Report Mask(FLOAT_RETURNS) +Return values of functions in FPU registers + +mfpmath= +Target RejectNegative Joined Var(ix86_fpmath_string) +Generate floating point mathematics using given instruction set + +mhard-float +Target RejectNegative Mask(80387) MaskExists +Use hardware fp + +mieee-fp +Target Report Mask(IEEE_FP) +Use IEEE math for fp comparisons + +minline-all-stringops +Target Report Mask(INLINE_ALL_STRINGOPS) +Inline all known string operations + +mintel-syntax +Target Undocumented +;; Deprecated + +mmmx +Target Report Mask(MMX) +Support MMX built-in functions + +mms-bitfields +Target Report Mask(MS_BITFIELD_LAYOUT) +Use native (MS) bitfield layout + +mno-align-stringops +Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented + +mno-fancy-math-387 +Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented + +mno-push-args +Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented + +mno-red-zone +Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented + +momit-leaf-frame-pointer +Target Report Mask(OMIT_LEAF_FRAME_POINTER) +Omit the frame pointer in leaf functions + +mpentium +Target RejectNegative Undocumented +;; Deprecated + +mpentiumpro +Target RejectNegative Undocumented +;; Deprecated + +mpreferred-stack-boundary= +Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string) +Attempt to keep stack aligned to this power of 2 + +mpush-args +Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) +Use push instructions to save outgoing arguments + +mred-zone +Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) +Use red-zone in the x86-64 code + +mregparm= +Target RejectNegative Joined Var(ix86_regparm_string) +Number of registers used to pass integer arguments + +mrtd +Target Report Mask(RTD) +Alternate calling convention + +msoft-float +Target InverseMask(80387) +Do not use hardware fp + +msse +Target Report Mask(SSE) +Support MMX and SSE built-in functions and code generation + +msse2 +Target Report Mask(SSE2) +Support MMX, SSE and SSE2 built-in functions and code generation + +msse3 +Target Report Mask(SSE3) +Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation + +; APPLE LOCAL begin mainline +mssse3 +Target Report Mask(SSSE3) +Support SSSE3 built-in functions and code generation +; APPLE LOCAL end mainline + +msseregparm +Target RejectNegative Mask(SSEREGPARM) +Use SSE register passing conventions for SF and DF mode + +mstackrealign +Target Report Var(ix86_force_align_arg_pointer) +Realign stack in prologue + +msvr3-shlib +Target Report Mask(SVR3_SHLIB) +Uninitialized locals in .bss + +mstack-arg-probe +Target Report Mask(STACK_PROBE) +Enable stack probing + +mtls-dialect= +Target RejectNegative Joined Var(ix86_tls_dialect_string) +Use given thread-local storage dialect + +mtls-direct-seg-refs +Target Report Mask(TLS_DIRECT_SEG_REFS) +Use direct references against %gs when accessing tls data + +mtune= +Target RejectNegative Joined Var(ix86_tune_string) +Schedule code for given CPU + +;; Support Athlon 3Dnow builtins +Mask(3DNOW_A) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; Var(ix86_isa_flags) +msse4.1 +Target Report Mask(SSE4_1) VarExists +Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation + +;; Var(ix86_isa_flags) +msse4.2 +Target Report Mask(SSE4_2) VarExists +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation + +;; Var(ix86_isa_flags) +msse4 +Target RejectNegative Report Mask(SSE4_2) MaskExists VarExists +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation + +;; Var(ix86_isa_flags) +mno-sse4 +Target RejectNegative Report InverseMask(SSE4_1) MaskExists VarExists +Do not support SSE4.1 and SSE4.2 built-in functions and code generation + +;; Var(ix86_isa_flags) +msse4a +Target Report Mask(SSE4A) VarExists +Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation +;; APPLE LOCAL end 5612787 mainline sse4 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/k6.md b/gcc-4.2.1-5666.3/gcc/config/i386/k6.md new file mode 100644 index 000000000..e0006aebb --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/k6.md @@ -0,0 +1,268 @@ +;; AMD K6/K6-2 Scheduling +;; Copyright (C) 2002, 2004 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. +;; +;; The K6 architecture is quite similar to PPro. Important difference is +;; that there are only two decoders and they seems to be much slower than +;; any of the execution units. So we have to pay much more attention to +;; proper scheduling for the decoders. +;; FIXME: We don't do that right now. A good start would be to sort the +;; instructions based on length. +;; +;; This description is based on data from the following documents: +;; +;; "AMD-K6 Processor Data Sheet (Preliminary information)" +;; Advanced Micro Devices, Inc., 1998. +;; +;; "AMD-K6 Processor Code Optimization Application Note" +;; Advanced Micro Devices, Inc., 2000. +;; +;; CPU execution units of the K6: +;; +;; store describes the Store unit. This unit is not modelled +;; completely and it is only used to model lea operation. +;; Otherwise it lies outside of any critical path. +;; load describes the Load unit +;; alux describes the Integer X unit +;; mm describes the Multimedia unit, which shares a pipe +;; with the Integer X unit. This unit is used for MMX, +;; which is not implemented for K6. +;; aluy describes the Integer Y unit +;; fpu describes the FPU unit +;; branch describes the Branch unit +;; +;; The fp unit is not pipelined, and it can only do one operation per two +;; cycles, including fxcg. +;; +;; Generally this is a very poor description, but at least no worse than +;; the old description, and a lot easier to extend to something more +;; reasonable if anyone still cares enough about this architecture in 2004. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. + +(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit") + +;; The K6 instruction decoding begins before the on-chip instruction cache is +;; filled. Depending on the length of the instruction, two simple instructions +;; can be decoded in two parallel short decoders, or one complex instruction can +;; be decoded in either the long or the vector decoder. For all practical +;; purposes, the long and vector decoder can be modelled as one decoder. +(define_cpu_unit "k6_decode_short0" "k6_decoder") +(define_cpu_unit "k6_decode_short1" "k6_decoder") +(define_cpu_unit "k6_decode_long" "k6_decoder") +(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1") +(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1") +(define_reservation "k6_decode_vector" "k6_decode_long") + +(define_cpu_unit "k6_store" "k6_store_unit") +(define_cpu_unit "k6_load" "k6_load_unit") +(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units") +(define_cpu_unit "k6_fpu" "k6_fpu_unit") +(define_cpu_unit "k6_branch" "k6_branch_unit") + +;; Shift instructions and certain arithmetic are issued only on Integer X. +(define_insn_reservation "k6_alux_only" 1 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld") + (eq_attr "memory" "none"))) + "k6_decode_short,k6_alux") + +(define_insn_reservation "k6_alux_only_load" 3 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld") + (eq_attr "memory" "load"))) + "k6_decode_short,k6_load,k6_alux") + +(define_insn_reservation "k6_alux_only_store" 3 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld") + (eq_attr "memory" "store,both,unknown"))) + "k6_decode_long,k6_load,k6_alux,k6_store") + +;; Integer divide and multiply can only be issued on Integer X, too. +(define_insn_reservation "k6_alu_imul" 2 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "imul")) + "k6_decode_vector,k6_alux*3") + +(define_insn_reservation "k6_alu_imul_load" 4 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load"))) + "k6_decode_vector,k6_load,k6_alux*3") + +(define_insn_reservation "k6_alu_imul_store" 4 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imul") + (eq_attr "memory" "store,both,unknown"))) + "k6_decode_vector,k6_load,k6_alux*3,k6_store") + +;; ??? Guessed latencies based on the old pipeline description. +(define_insn_reservation "k6_alu_idiv" 17 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "none"))) + "k6_decode_vector,k6_alux*17") + +(define_insn_reservation "k6_alu_idiv_mem" 19 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "!none"))) + "k6_decode_vector,k6_load,k6_alux*17") + +;; Basic word and doubleword ALU ops can be issued on both Integer units. +(define_insn_reservation "k6_alu" 1 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc") + (eq_attr "memory" "none"))) + "k6_decode_short,k6_alux|k6_aluy") + +(define_insn_reservation "k6_alu_load" 3 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc") + (eq_attr "memory" "load"))) + "k6_decode_short,k6_load,k6_alux|k6_aluy") + +(define_insn_reservation "k6_alu_store" 3 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc") + (eq_attr "memory" "store,both,unknown"))) + "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store") + +;; A "load immediate" operation does not require execution at all, +;; it is available immediately after decoding. Special-case this. +(define_insn_reservation "k6_alu_imov" 1 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (and (eq_attr "memory" "none") + (match_operand 1 "nonimmediate_operand")))) + "k6_decode_short,k6_alux|k6_aluy") + +(define_insn_reservation "k6_alu_imov_imm" 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (and (eq_attr "memory" "none") + (match_operand 1 "immediate_operand")))) + "k6_decode_short") + +(define_insn_reservation "k6_alu_imov_load" 2 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (eq_attr "memory" "load"))) + "k6_decode_short,k6_load") + +(define_insn_reservation "k6_alu_imov_store" 1 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (eq_attr "memory" "store"))) + "k6_decode_short,k6_store") + +(define_insn_reservation "k6_alu_imov_both" 2 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (eq_attr "memory" "both,unknown"))) + "k6_decode_long,k6_load,k6_alux|k6_aluy") + +;; The branch unit. +(define_insn_reservation "k6_branch_call" 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "call,callv")) + "k6_decode_vector,k6_branch") + +(define_insn_reservation "k6_branch_branch" 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "ibr")) + "k6_decode_short,k6_branch") + +;; The load and units have two pipeline stages. The load latency is +;; two cycles. +(define_insn_reservation "k6_load_pop" 3 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "pop") + (eq_attr "memory" "load,both"))) + "k6_decode_short,k6_load") + +(define_insn_reservation "k6_load_leave" 5 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "leave")) + "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2") + +;; ??? From the old pipeline description. Egad! +;; ??? Apparently we take care of this reservation in adjust_cost. +(define_insn_reservation "k6_load_str" 10 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + "k6_decode_vector,k6_load*10") + +;; The store unit handles lea and push. It is otherwise unmodelled. +(define_insn_reservation "k6_store_lea" 2 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "lea")) + "k6_decode_short,k6_store,k6_alux|k6_aluy") + +(define_insn_reservation "k6_store_push" 2 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "push") + (eq_attr "memory" "store,both"))) + "k6_decode_short,k6_store") + +(define_insn_reservation "k6_store_str" 10 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "str")) + "k6_store*10") + +;; Most FPU instructions have latency 2 and throughput 2. +(define_insn_reservation "k6_fpu" 2 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "fop,fmov,fcmp,fistp") + (eq_attr "memory" "none"))) + "k6_decode_vector,k6_fpu*2") + +(define_insn_reservation "k6_fpu_load" 6 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "fop,fmov,fcmp,fistp") + (eq_attr "memory" "load,both"))) + "k6_decode_short,k6_load,k6_fpu*2") + +(define_insn_reservation "k6_fpu_store" 6 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "fop,fmov,fcmp,fistp") + (eq_attr "memory" "store"))) + "k6_decode_short,k6_store,k6_fpu*2") + +(define_insn_reservation "k6_fpu_fmul" 2 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "none"))) + "k6_decode_short,k6_fpu*2") + +(define_insn_reservation "k6_fpu_fmul_load" 2 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load,both"))) + "k6_decode_short,k6_load,k6_fpu*2") + +;; ??? Guessed latencies from the old pipeline description. +(define_insn_reservation "k6_fpu_expensive" 56 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "fdiv,fpspc")) + "k6_decode_short,k6_fpu*56") + diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm b/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm new file mode 100644 index 000000000..c672024bb --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm @@ -0,0 +1,30 @@ +# APPLE LOCAL file 4099000 +#ifndef __x86_64__ +#define THUNK(REG) \ +.private_extern ___i686.get_pc_thunk.REG ;\ +___i686.get_pc_thunk.REG: ;\ + movl (%esp,1),%REG ;\ + ret ; + +#ifdef L_get_pc_thunk_ax +THUNK(eax) +#endif +#ifdef L_get_pc_thunk_dx +THUNK(edx) +#endif +#ifdef L_get_pc_thunk_cx +THUNK(ecx) +#endif +#ifdef L_get_pc_thunk_bx +THUNK(ebx) +#endif +#ifdef L_get_pc_thunk_si +THUNK(esi) +#endif +#ifdef L_get_pc_thunk_di +THUNK(edi) +#endif +#ifdef L_get_pc_thunk_bp +THUNK(ebp) +#endif +#endif diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h b/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h new file mode 100644 index 000000000..7fdc6dce5 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h @@ -0,0 +1,220 @@ +/* Copyright (C) 2004 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with + MSVC 7.1. */ + +#ifndef _MM3DNOW_H_INCLUDED +#define _MM3DNOW_H_INCLUDED + +#ifdef __3dNOW__ + +#include <mmintrin.h> + +/* Internal data types for implementing the intrinsics. */ +typedef float __v2sf __attribute__ ((__vector_size__ (8))); + +static __inline void +_m_femms (void) +{ + __builtin_ia32_femms(); +} + +static __inline __m64 +_m_pavgusb (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B); +} + +static __inline __m64 +_m_pf2id (__m64 __A) +{ + return (__m64)__builtin_ia32_pf2id ((__v2sf)__A); +} + +static __inline __m64 +_m_pfacc (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfadd (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfcmpeq (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfcmpge (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfcmpgt (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfmax (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfmin (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfmul (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfrcp (__m64 __A) +{ + return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A); +} + +static __inline __m64 +_m_pfrcpit1 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfrcpit2 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfrsqrt (__m64 __A) +{ + return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A); +} + +static __inline __m64 +_m_pfrsqit1 (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfsub (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfsubr (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pi2fd (__m64 __A) +{ + return (__m64)__builtin_ia32_pi2fd ((__v2si)__A); +} + +static __inline __m64 +_m_pmulhrw (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B); +} + +static __inline void +_m_prefetch (void *__P) +{ + __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); +} + +static __inline void +_m_prefetchw (void *__P) +{ + __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); +} + +static __inline __m64 +_m_from_float (float __A) +{ + return (__m64)(__v2sf){ __A, 0 }; +} + +static __inline float +_m_to_float (__m64 __A) +{ + union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A }; + return __tmp.a[0]; +} + +#ifdef __3dNOW_A__ + +static __inline __m64 +_m_pf2iw (__m64 __A) +{ + return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A); +} + +static __inline __m64 +_m_pfnacc (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pfpnacc (__m64 __A, __m64 __B) +{ + return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B); +} + +static __inline __m64 +_m_pi2fw (__m64 __A) +{ + return (__m64)__builtin_ia32_pi2fw ((__v2si)__A); +} + +static __inline __m64 +_m_pswapd (__m64 __A) +{ + return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A); +} + +#endif /* __3dNOW_A__ */ +#endif /* __3dNOW__ */ + +#endif /* _MM3DNOW_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h new file mode 100644 index 000000000..64db0589c --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h @@ -0,0 +1,1219 @@ +/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */ +/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef _MMINTRIN_H_INCLUDED +#define _MMINTRIN_H_INCLUDED + +#ifndef __MMX__ +# error "MMX instruction set not enabled" +#else +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +/* APPLE LOCAL 4505813 */ +typedef long long __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); + +/* Internal data types for implementing the intrinsics. */ +typedef int __v2si __attribute__ ((__vector_size__ (8))); +typedef short __v4hi __attribute__ ((__vector_size__ (8))); +typedef char __v8qi __attribute__ ((__vector_size__ (8))); + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* Empty the multimedia state. */ +/* APPLE LOCAL begin radar 4152603 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_empty (void) +{ + __builtin_ia32_emms (); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_empty (void) +{ + _mm_empty (); +} + +/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi32_si64 (int __i) +{ + return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_from_int (int __i) +{ + return _mm_cvtsi32_si64 (__i); +} + +#ifdef __x86_64__ +/* Convert I to a __m64 object. */ + +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_from_int64 (long long __i) +{ + return (__m64) __i; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_m64 (long long __i) +{ + return (__m64) __i; +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64x_si64 (long long __i) +{ + return (__m64) __i; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pi64x (long long __i) +{ + return (__m64) __i; +} +#endif + +/* Convert the lower 32 bits of the __m64 object into an integer. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_si32 (__m64 __i) +{ + return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_to_int (__m64 __i) +{ + return _mm_cvtsi64_si32 (__i); +} + +#ifdef __x86_64__ +/* Convert the __m64 object to a 64bit integer. */ + +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_to_int64 (__m64 __i) +{ + return (long long)__i; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtm64_si64 (__m64 __i) +{ + return (long long)__i; +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_si64x (__m64 __i) +{ + return (long long)__i; +} +#endif + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with signed saturation. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_packsswb (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pi16 (__m1, __m2); +} + +/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with signed saturation. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_packssdw (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pi32 (__m1, __m2); +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with unsigned saturation. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_packs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_packuswb (__m64 __m1, __m64 __m2) +{ + return _mm_packs_pu16 (__m1, __m2); +} + +/* Interleave the four 8-bit values from the high half of M1 with the four + 8-bit values from the high half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpckhbw (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi8 (__m1, __m2); +} + +/* Interleave the two 16-bit values from the high half of M1 with the two + 16-bit values from the high half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpckhwd (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi16 (__m1, __m2); +} + +/* Interleave the 32-bit value from the high half of M1 with the 32-bit + value from the high half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpckhdq (__m64 __m1, __m64 __m2) +{ + return _mm_unpackhi_pi32 (__m1, __m2); +} + +/* Interleave the four 8-bit values from the low half of M1 with the four + 8-bit values from the low half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpcklbw (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi8 (__m1, __m2); +} + +/* Interleave the two 16-bit values from the low half of M1 with the two + 16-bit values from the low half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpcklwd (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi16 (__m1, __m2); +} + +/* Interleave the 32-bit value from the low half of M1 with the 32-bit + value from the low half of M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_punpckldq (__m64 __m1, __m64 __m2) +{ + return _mm_unpacklo_pi32 (__m1, __m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddb (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi8 (__m1, __m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddw (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi16 (__m1, __m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddd (__m64 __m1, __m64 __m2) +{ + return _mm_add_pi32 (__m1, __m2); +} + +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +#ifdef __SSE2__ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_si64 (__m64 __m1, __m64 __m2) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_paddq (__m1, __m2); +} +#endif + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed + saturated arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddsb (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pi8 (__m1, __m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed + saturated arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddsw (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pi16 (__m1, __m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned + saturated arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddusb (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pu8 (__m1, __m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned + saturated arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_adds_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_paddusw (__m64 __m1, __m64 __m2) +{ + return _mm_adds_pu16 (__m1, __m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubb (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi8 (__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubw (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi16 (__m1, __m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubd (__m64 __m1, __m64 __m2) +{ + return _mm_sub_pi32 (__m1, __m2); +} + +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +#ifdef __SSE2__ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_si64 (__m64 __m1, __m64 __m2) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psubq (__m1, __m2); +} +#endif + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed + saturating arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubsb (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pi8 (__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + signed saturating arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubsw (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pi16 (__m1, __m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using + unsigned saturating arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubusb (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pu8 (__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + unsigned saturating arithmetic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_subs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psubusw (__m64 __m1, __m64 __m2) +{ + return _mm_subs_pu16 (__m1, __m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_madd_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmaddwd (__m64 __m1, __m64 __m2) +{ + return _mm_madd_pi16 (__m1, __m2); +} + +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmulhw (__m64 __m1, __m64 __m2) +{ + return _mm_mulhi_pi16 (__m1, __m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce + the low 16 bits of the results. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mullo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmullw (__m64 __m1, __m64 __m2) +{ + return _mm_mullo_pi16 (__m1, __m2); +} + +/* Shift four 16-bit values in M left by COUNT. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_pi16 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psllw (__m64 __m, __m64 __count) +{ + return _mm_sll_pi16 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_pi16 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psllwi (__m64 __m, int __count) +{ + return _mm_slli_pi16 (__m, __count); +} + +/* Shift two 32-bit values in M left by COUNT. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_pi32 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pslld (__m64 __m, __m64 __count) +{ + return _mm_sll_pi32 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_pi32 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pslldi (__m64 __m, int __count) +{ + return _mm_slli_pi32 (__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sll_si64 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psllq (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psllq (__m64 __m, __m64 __count) +{ + return _mm_sll_si64 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_slli_si64 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psllqi (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psllqi (__m64 __m, int __count) +{ + return _mm_slli_si64 (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sra_pi16 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psraw (__m64 __m, __m64 __count) +{ + return _mm_sra_pi16 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srai_pi16 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrawi (__m64 __m, int __count) +{ + return _mm_srai_pi16 (__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sra_pi32 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrad (__m64 __m, __m64 __count) +{ + return _mm_sra_pi32 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srai_pi32 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psradi (__m64 __m, int __count) +{ + return _mm_srai_pi32 (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_pi16 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrlw (__m64 __m, __m64 __count) +{ + return _mm_srl_pi16 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_pi16 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrlwi (__m64 __m, int __count) +{ + return _mm_srli_pi16 (__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_pi32 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrld (__m64 __m, __m64 __count) +{ + return _mm_srl_pi32 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_pi32 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrldi (__m64 __m, int __count) +{ + return _mm_srli_pi32 (__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srl_si64 (__m64 __m, __m64 __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrlq (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrlq (__m64 __m, __m64 __count) +{ + return _mm_srl_si64 (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_srli_si64 (__m64 __m, int __count) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + return (__m64) __builtin_ia32_psrlqi (__m, __count); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psrlqi (__m64 __m, int __count) +{ + return _mm_srli_si64 (__m, __count); +} + +/* Bit-wise AND the 64-bit values in M1 and M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_and_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pand (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pand (__m64 __m1, __m64 __m2) +{ + return _mm_and_si64 (__m1, __m2); +} + +/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the + 64-bit value in M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_andnot_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pandn (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pandn (__m64 __m1, __m64 __m2) +{ + return _mm_andnot_si64 (__m1, __m2); +} + +/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_or_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_por (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_por (__m64 __m1, __m64 __m2) +{ + return _mm_or_si64 (__m1, __m2); +} + +/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_xor_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pxor (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pxor (__m64 __m1, __m64 __m2) +{ + return _mm_xor_si64 (__m1, __m2); +} + +/* Compare eight 8-bit values. The result of the comparison is 0xFF if the + test is true and zero if false. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpeqb (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi8 (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpgtb (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi8 (__m1, __m2); +} + +/* Compare four 16-bit values. The result of the comparison is 0xFFFF if + the test is true and zero if false. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpeqw (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi16 (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpgtw (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi16 (__m1, __m2); +} + +/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if + the test is true and zero if false. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpeqd (__m64 __m1, __m64 __m2) +{ + return _mm_cmpeq_pi32 (__m1, __m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pcmpgtd (__m64 __m1, __m64 __m2) +{ + return _mm_cmpgt_pi32 (__m1, __m2); +} + +/* Creates a 64-bit zero. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setzero_si64 (void) +{ + return (__m64)0LL; +} + +/* Creates a vector of two 32-bit values; I0 is least significant. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pi32 (int __i1, int __i0) +{ + return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1); +} + +/* Creates a vector of four 16-bit values; W0 is least significant. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) +{ + return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3); +} + +/* Creates a vector of eight 8-bit values; B0 is least significant. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, + char __b3, char __b2, char __b1, char __b0) +{ + return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3, + __b4, __b5, __b6, __b7); +} + +/* Similar, but with the arguments in reverse order. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_pi32 (int __i0, int __i1) +{ + return _mm_set_pi32 (__i1, __i0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) +{ + return _mm_set_pi16 (__w3, __w2, __w1, __w0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, + char __b4, char __b5, char __b6, char __b7) +{ + return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + +/* Creates a vector of two 32-bit values, both elements containing I. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_pi32 (int __i) +{ + return _mm_set_pi32 (__i, __i); +} + +/* Creates a vector of four 16-bit values, all elements containing W. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_pi16 (short __w) +{ + return _mm_set_pi16 (__w, __w, __w, __w); +} + +/* Creates a vector of eight 8-bit values, all elements containing B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_pi8 (char __b) +{ + return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); +} +/* APPLE LOCAL end radar 4152603 */ + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +#endif /* __MMX__ */ +#endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md b/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md new file mode 100644 index 000000000..4e55cfdc2 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md @@ -0,0 +1,1470 @@ +;; GCC machine description for MMX and 3dNOW! instructions +;; Copyright (C) 2005 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;; The MMX and 3dNOW! patterns are in the same file because they use +;; the same register file, and 3dNOW! adds a number of extensions to +;; the base integer MMX isa. + +;; Note! Except for the basic move instructions, *all* of these +;; patterns are outside the normal optabs namespace. This is because +;; use of these registers requires the insertion of emms or femms +;; instructions to return to normal fpu mode. The compiler doesn't +;; know how to do that itself, which means it's up to the user. Which +;; means that we should never use any of these patterns except at the +;; direction of the user via a builtin. + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +;; 8 byte integral modes handled by MMX (and by extension, SSE) +(define_mode_macro MMXMODEI [V8QI V4HI V2SI V1DI]) + +;; All 8-byte vector modes handled by MMX +(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF V1DI]) + +;; Mix-n-match +(define_mode_macro MMXMODE12 [V8QI V4HI]) +(define_mode_macro MMXMODE24 [V4HI V2SI]) +(define_mode_macro MMXMODE124 [V8QI V4HI V2SI]) +(define_mode_macro MMXMODE248 [V4HI V2SI V1DI]) + +;; Mapping from integer vector mode to mnemonic suffix +(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Move patterns +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; All of these patterns are enabled for MMX as well as 3dNOW. +;; This is essential for maintaining stable calling conventions. + +(define_expand "mov<mode>" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") + (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (<MODE>mode, operands); + DONE; +}) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +;; Take {ym->y} into account for register allocation +(define_insn "*mov<mode>_internal_rex64" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "=rm,r,*y,*y ,m ,*y,Yt,x,x ,m,r,x") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (match_operand:MMXMODEI 1 "vector_move_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "Cr ,m,C ,*ym,*y,Yt,*y,C,xm,x,x,r"))] +;; APPLE LOCAL end mainline 2007-06-05 5103201 + "TARGET_64BIT && TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov") + (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "*mov<mode>_internal" +;; APPLE LOCAL begin radar 4043818 + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "=*y,y ,m ,*y ,*Yt,*Yt,*Yt ,m ,*x,*x,*x,m ,?r ,?m") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (match_operand:MMXMODEI 1 "vector_move_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "C ,*ym,*y,*Yt,*y ,C ,*Ytm,*Yt,C ,*x,m ,*x,irm,r"))] +;; APPLE LOCAL end mainline 2007-06-05 5103201 +;; APPLE LOCAL end radar 4043818 + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + # + #" + [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*") + (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")]) + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V2SFmode, operands); + DONE; +}) + +(define_insn "*movv2sf_internal_rex64" + [(set (match_operand:V2SF 0 "nonimmediate_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "=rm,r,*y ,*y ,m ,*y,Yt,x,x,x,m,r,x") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (match_operand:V2SF 1 "vector_move_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "Cr ,m ,C ,*ym,*y,Yt,*y,C,x,m,x,x,r"))] +;; APPLE LOCAL end mainline 2007-06-05 5103201 + "TARGET_64BIT && TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov") + (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*") + (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")]) + +(define_insn "*movv2sf_internal" + [(set (match_operand:V2SF 0 "nonimmediate_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "=*y,*y ,m,*y ,*Yt,*x,*x,*x,m ,?r ,?m") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (match_operand:V2SF 1 "vector_move_operand" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + "C ,*ym,*y,*Yt,*y ,C ,*x,m ,*x,irm,r"))] +;; APPLE LOCAL end mainline 2007-06-05 5103201 + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + # + #" + [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*") + (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*") + (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")]) + +;; %%% This multiword shite has got to go. +(define_split + [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") + (match_operand:MMXMODE 1 "general_operand" ""))] +;; APPLE LOCAL begin 4099020 + "!TARGET_64BIT && reload_completed + && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]) && GET_CODE (operands[0]) != SUBREG) + && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]) && GET_CODE (operands[1]) != SUBREG)" +;; APPLE LOCAL end 4099020 + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_expand "push<mode>1" + [(match_operand:MMXMODE 0 "register_operand" "")] + "TARGET_MMX" +{ + ix86_expand_push (<MODE>mode, operands[0]); + DONE; +}) + +(define_expand "movmisalign<mode>" + [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") + (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (<MODE>mode, operands); + DONE; +}) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "sse_movntv1di" + [(set (match_operand:V1DI 0 "memory_operand" "=m") + (unspec:V1DI [(match_operand:V1DI 1 "register_operand" "y")] + UNSPEC_MOVNT))] + "TARGET_SSE || TARGET_3DNOW_A" + "movntq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_addv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" + "pfadd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_subv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y,y") + (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym") + (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))] + "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + pfsub\\t{%2, %0|%0, %2} + pfsubr\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_expand "mmx_subrv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "") + (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" "")))] + "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "") + +(define_insn "mmx_mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)" + "pfmul\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_smaxv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smax:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (SMAX, V2SFmode, operands)" + "pfmax\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_sminv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smin:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (SMIN, V2SFmode, operands)" + "pfmin\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRCP))] + "TARGET_3DNOW" + "pfrcp\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT1))] + "TARGET_3DNOW" + "pfrcpit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpit2v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT2))] + "TARGET_3DNOW" + "pfrcpit2\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rsqrtv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQRT))] + "TARGET_3DNOW" + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rsqit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQIT1))] + "TARGET_3DNOW" + "pfrsqit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_haddv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF + (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_3DNOW" + "pfacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_hsubv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF + (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_addsubv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_merge:V2SF + (plus:V2SF + (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")) + (minus:V2SF (match_dup 1) (match_dup 2)) + (const_int 1)))] + "TARGET_3DNOW_A" + "pfpnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_gtv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpgt\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_gev2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpge\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_eqv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)" + "pfcmpeq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_pf2id" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pf2id\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_pf2iw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (ss_truncate:V2HI + (fix:V2SI + (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pf2iw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_pi2fw" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF + (sign_extend:V2SI + (truncate:V2HI + (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pi2fw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_floatv2si2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pi2fd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_pswapdv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "*vec_dupv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_duplicate:V2SF + (match_operand:SF 1 "register_operand" "0")))] + "TARGET_MMX" + "punpckldq\t%0, %0" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*mmx_concatv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=y,y") + (vec_concat:V2SF + (match_operand:SF 1 "nonimmediate_operand" " 0,rm") + (match_operand:SF 2 "vector_move_operand" "ym,C")))] + "TARGET_MMX && !TARGET_SSE" + "@ + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt,mmxmov") + (set_attr "mode" "DI")]) + +(define_expand "vec_setv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_insn_and_split "*vec_extractv2sf_0" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,y,m,m,frxy") + (vec_select:SF + (match_operand:V2SF 1 "nonimmediate_operand" " x,y,x,y,m") + (parallel [(const_int 0)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SFmode, REGNO (op1)); + else + op1 = gen_lowpart (SFmode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "*vec_extractv2sf_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,frxy") + (vec_select:SF + (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o") + (parallel [(const_int 1)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + punpckhdq\t%0, %0 + unpckhps\t%0, %0 + #" + [(set_attr "type" "mmxcvt,sselog1,*") + (set_attr "mode" "DI,V4SF,SI")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (vec_select:SF + (match_operand:V2SF 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_MMX && reload_completed" + [(const_int 0)] +{ + operands[1] = adjust_address (operands[1], SFmode, 4); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_extractv2sf" + [(match_operand:SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_add<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (plus:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "padd<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +;; remove mmx_adddi3 +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "mmx_ssadd<mode>3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (ss_plus:MMXMODE12 + (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "padds<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_usadd<mode>3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (us_plus:MMXMODE12 + (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddus<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_sub<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (minus:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psub<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +;; remove mmx_subdi3 +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "mmx_sssub<mode>3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (ss_minus:MMXMODE12 + (match_operand:MMXMODE12 1 "register_operand" "0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubs<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ussub<mode>3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (us_minus:MMXMODE12 + (match_operand:MMXMODE12 1 "register_operand" "0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubus<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) (const_int 3)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) (const_int 3)]))))))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pmulhrwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_vector:V4SI [(const_int 32768) (const_int 32768) + (const_int 32768) (const_int 32768)])) + (const_int 16))))] + "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhrw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "sse2_umulsidi3" + [(set (match_operand:V1DI 0 "register_operand" "=y") + (mult:V1DI + (zero_extend:V1DI + (vec_select:V1SI + (match_operand:V2SI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0)]))) + (zero_extend:V1DI + (vec_select:V1SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "mmx_umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (UMAX, V8QImode, operands)" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (SMAX, V4HImode, operands)" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (UMIN, V8QImode, operands)" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (SMIN, V4HImode, operands)" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "mmx_ashr<mode>3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") + (match_operand:V1DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psra<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashr<mode>2si" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") + (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))] + "TARGET_MMX" + "psra<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_lshr<mode>3" + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (lshiftrt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:V1DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrl<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_lshr<mode>2si" + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (lshiftrt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))] + "TARGET_MMX" + "psrl<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashl<mode>3" + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (ashift:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (match_operand:V1DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psll<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashl<mode>2si" + [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + (ashift:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand" "0") + (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))] + "TARGET_MMX" + "psll<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "mmx_eq<mode>3" + [(set (match_operand:MMXMODE124 0 "register_operand" "=y") + (eq:MMXMODE124 + (match_operand:MMXMODE124 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" + "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) + +(define_insn "mmx_gt<mode>3" + [(set (match_operand:MMXMODE124 0 "register_operand" "=y") + (gt:MMXMODE124 + (match_operand:MMXMODE124 1 "register_operand" "0") + (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_and<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (and:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (AND, <MODE>mode, operands)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_nand<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (and:MMXMODEI + (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0")) + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ior<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (ior:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_xor<mode>3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (xor:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") + (set_attr "memory" "none")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_packsswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI + (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI + (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI + (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_packuswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI + (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_MMX" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpcklbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_MMX" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_MMX" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpcklwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_MMX" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhdq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_MMX" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_MMX" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_expand "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "") + (vec_merge:V4HI + (vec_duplicate:V4HI + (match_operand:SI 2 "nonimmediate_operand" "")) + (match_operand:V4HI 1 "register_operand" "") + (match_operand:SI 3 "const_0_to_3_operand" "")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[2] = gen_lowpart (HImode, operands[2]); + operands[3] = GEN_INT (1 << INTVAL (operands[3])); +}) + +(define_insn "*mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_duplicate:V4HI + (match_operand:HI 2 "nonimmediate_operand" "rm")) + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "y") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_expand "mmx_pshufw" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE || TARGET_3DNOW_A" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "mmx_pshufw_1" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (match_operand:V4HI 1 "nonimmediate_operand" "ym") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "")])))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshufw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pswapdv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*vec_dupv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_duplicate:V4HI + (truncate:HI + (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pshufw\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*vec_dupv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_duplicate:V2SI + (match_operand:SI 1 "register_operand" "0")))] + "TARGET_MMX" + "punpckldq\t%0, %0" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*mmx_concatv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y,y") + (vec_concat:V2SI + (match_operand:SI 1 "nonimmediate_operand" " 0,rm") + (match_operand:SI 2 "vector_move_operand" "ym,C")))] + "TARGET_MMX && !TARGET_SSE" + "@ + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt,mmxmov") + (set_attr "mode" "DI")]) + +(define_expand "vec_setv2si" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +;; APPLE LOCAL begin 4684674 permit mmx-to-int reg +(define_insn_and_split "*vec_extractv2si_0" + [(set (match_operand:SI 0 "nonimmediate_operand" "=x,y,m,mr,frxy") + (vec_select:SI + (match_operand:V2SI 1 "nonimmediate_operand" " x,y,x,y,m") + (parallel [(const_int 0)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SImode, REGNO (op1)); + else + op1 = gen_lowpart (SImode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) +;; APPLE LOCAL end 4684674 + +(define_insn "*vec_extractv2si_1" +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Yt,Yt,x,frxy") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (vec_select:SI +;; APPLE LOCAL begin mainline 2007-06-05 5103201 + (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Yt,0,o") +;; APPLE LOCAL end mainline 2007-06-05 5103201 + (parallel [(const_int 1)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + punpckhdq\t%0, %0 + punpckhdq\t%0, %0 + pshufd\t{$85, %1, %0|%0, %1, 85} + unpckhps\t%0, %0 + #" + [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,*") + (set_attr "mode" "DI,TI,TI,V4SF,SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (vec_select:SI + (match_operand:V2SI 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_MMX && reload_completed" + [(const_int 0)] +{ + operands[1] = adjust_address (operands[1], SImode, 4); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_extractv2si" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2SI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv2si" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv4hi" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv4hi" + [(match_operand:HI 0 "register_operand" "") + (match_operand:V4HI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv4hi" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv8qi" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand:QI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv8qi" + [(match_operand:QI 0 "register_operand" "") + (match_operand:V8QI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv8qi" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Miscellaneous +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (lshiftrt:V8HI + (plus:V8HI + (plus:V8HI + (zero_extend:V8HI + (match_operand:V8QI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8HI + (match_operand:V8QI 2 "nonimmediate_operand" "ym"))) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "(TARGET_SSE || TARGET_3DNOW) + && ix86_binary_operator_ok (PLUS, V8QImode, operands)" +{ + /* These two instructions have the same operation, but their encoding + is different. Prefer the one that is de facto standard. */ + if (TARGET_SSE || TARGET_3DNOW_A) + return "pavgb\t{%2, %0|%0, %2}"; + else + return "pavgusb\\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (plus:V4SI + (zero_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_vector:V4SI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (PLUS, V4HImode, operands)" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "mmx_psadbw" + [(set (match_operand:V1DI 0 "register_operand" "=y") + (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSADBW))] + "TARGET_SSE || TARGET_3DNOW_A" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] + UNSPEC_MOVMSK))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_expand "mmx_maskmovq" + [(set (match_operand:V8QI 0 "memory_operand" "") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_SSE || TARGET_3DNOW_A" + "") + +(define_insn "*mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (mem:V8QI (match_dup 0))] + UNSPEC_MASKMOV))] + "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*mmx_maskmovq_rex" + [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (mem:V8QI (match_dup 0))] + UNSPEC_MASKMOV))] + "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_emms" + [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_MMX" + "emms" + [(set_attr "type" "mmx") + (set_attr "memory" "unknown")]) + +(define_insn "mmx_femms" + [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_3DNOW" + "femms" + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h new file mode 100644 index 000000000..5c0db207b --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h @@ -0,0 +1,41 @@ +/* APPLE LOCAL file 5612787 mainline sse4 */ +/* Copyright (C) 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 10.0. */ + +#ifndef _NMMINTRIN_H_INCLUDED +#define _NMMINTRIN_H_INCLUDED + +#ifndef __SSE4_2__ +# error "SSE4.2 instruction set not enabled" +#else +/* We just include SSE4.1 header file. */ +#include <smmintrin.h> +#endif /* __SSE4_2__ */ + +#endif /* _NMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md b/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md new file mode 100644 index 000000000..1f994dd60 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md @@ -0,0 +1,312 @@ +;; Pentium Scheduling +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. */ +;; +;; The Pentium is an in-order core with two integer pipelines. + +;; True for insns that behave like prefixed insns on the Pentium. +(define_attr "pent_prefix" "false,true" + (if_then_else (ior (eq_attr "prefix_0f" "1") + (ior (eq_attr "prefix_data16" "1") + (eq_attr "prefix_rep" "1"))) + (const_string "true") + (const_string "false"))) + +;; Categorize how an instruction slots. + +;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, +;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium +;; rules, because it results in noticeably better code on non-MMX Pentium +;; and doesn't hurt much on MMX. (Prefixed instructions are not very +;; common, so the scheduler usually has a non-prefixed insn to pair). + +(define_attr "pent_pair" "uv,pu,pv,np" + (cond [(eq_attr "imm_disp" "true") + (const_string "np") + (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") + (and (eq_attr "type" "pop,push") + (eq_attr "memory" "!both"))) + (if_then_else (eq_attr "pent_prefix" "true") + (const_string "pu") + (const_string "uv")) + (eq_attr "type" "ibr") + (const_string "pv") + (and (eq_attr "type" "ishift") + (match_operand 2 "const_int_operand" "")) + (const_string "pu") + (and (eq_attr "type" "rotate") + (match_operand 2 "const1_operand" "")) + (const_string "pu") + (and (eq_attr "type" "ishift1") + (match_operand 1 "const_int_operand" "")) + (const_string "pu") + (and (eq_attr "type" "rotate1") + (match_operand 1 "const1_operand" "")) + (const_string "pu") + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_string "pv") + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_string "pv") + ] + (const_string "np"))) + +(define_automaton "pentium,pentium_fpu") + +;; Pentium do have U and V pipes. Instruction to both pipes +;; are always issued together, much like on VLIW. +;; +;; predecode +;; / \ +;; decodeu decodev +;; / | | +;; fpu executeu executev +;; | | | +;; fpu retire retire +;; | +;; fpu +;; We add dummy "port" pipes allocated only first cycle of +;; instruction to specify this behavior. + +(define_cpu_unit "pentium-portu,pentium-portv" "pentium") +(define_cpu_unit "pentium-u,pentium-v" "pentium") +(absence_set "pentium-portu" "pentium-u,pentium-v") +(presence_set "pentium-portv" "pentium-portu") + +;; Floating point instructions can overlap with new issue of integer +;; instructions. We model only first cycle of FP pipeline, as it is +;; fully pipelined. +(define_cpu_unit "pentium-fp" "pentium_fpu") + +;; There is non-pipelined multiplier unit used for complex operations. +(define_cpu_unit "pentium-fmul" "pentium_fpu") + +;; Pentium preserves memory ordering, so when load-execute-store +;; instruction is executed together with other instruction loading +;; data, the execution of the other instruction is delayed to very +;; last cycle of first instruction, when data are bypassed. +;; We model this by allocating "memory" unit when store is pending +;; and using conflicting load units together. + +(define_cpu_unit "pentium-memory" "pentium") +(define_cpu_unit "pentium-load0" "pentium") +(define_cpu_unit "pentium-load1" "pentium") +(absence_set "pentium-load0,pentium-load1" "pentium-memory") + +(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)") +(define_reservation "pentium-np" "(pentium-u + pentium-v)") +(define_reservation "pentium-uv" "(pentium-u | pentium-v)") +(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)") +(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)") +(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)") +(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)") +(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)") +(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)") +(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv) + | (pentium-firstv,pentium-v, + (pentium-load+pentium-firstv))") +(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu + + pentium-memory)") +(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv + + pentium-memory)") +(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv + + pentium-memory) + | (pentium-firstv,pentium-v, + (pentium-load+pentium-firstv))") + +;; Few common long latency instructions +(define_insn_reservation "pent_mul" 11 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imul")) + "pentium-np*11") + +(define_insn_reservation "pent_str" 12 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "str")) + "pentium-np*12") + +;; Integer division and some other long latency instruction block all +;; units, including the FP pipe. There is no value in modeling the +;; latency of these instructions and not modeling the latency +;; decreases the size of the DFA. +(define_insn_reservation "pent_block" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "idiv")) + "pentium-np+pentium-fp") + +(define_insn_reservation "pent_cld" 2 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "cld")) + "pentium-np*2") + +;; Moves usually have one cycle penalty, but there are exceptions. +(define_insn_reservation "pent_fmov" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none,load"))) + "(pentium-fp+pentium-np)") + +(define_insn_reservation "pent_fpmovxf" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF")))) + "(pentium-fp+pentium-np)*3") + +(define_insn_reservation "pent_fpstore" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (ior (match_operand 1 "immediate_operand" "") + (eq_attr "memory" "store")))) + "(pentium-fp+pentium-np)*2") + +(define_insn_reservation "pent_imov" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imov")) + "pentium-firstuv") + +;; Push and pop instructions have 1 cycle latency and special +;; hardware bypass allows them to be paired with other push,pop +;; and call instructions. +(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call") +(define_insn_reservation "pent_push" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "push") + (eq_attr "memory" "store"))) + "pentium-firstuv") + +(define_insn_reservation "pent_pop" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "pop,leave")) + "pentium-firstuv") + +;; Call and branch instruction can execute in either pipe, but +;; they are only pairable when in the v pipe. +(define_insn_reservation "pent_call" 10 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "call,callv")) + "pentium-firstv,pentium-v*9") + +(define_insn_reservation "pent_branch" 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "ibr")) + "pentium-firstv") + +;; Floating point instruction dispatch in U pipe, but continue +;; in FP pipeline allowing other instructions to be executed. +(define_insn_reservation "pent_fp" 3 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fop,fistp")) + "(pentium-firstu+pentium-fp),nothing,nothing") + +;; First two cycles of fmul are not pipelined. +(define_insn_reservation "pent_fmul" 3 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fmul")) + "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing") + +;; Long latency FP instructions overlap with integer instructions, +;; but only last 2 cycles with FP ones. +(define_insn_reservation "pent_fdiv" 39 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fdiv")) + "(pentium-np+pentium-fp+pentium-fmul), + (pentium-fp+pentium-fmul)*36,pentium-fmul*2") + +(define_insn_reservation "pent_fpspc" 70 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fpspc")) + "(pentium-np+pentium-fp+pentium-fmul), + (pentium-fp+pentium-fmul)*67,pentium-fmul*2") + +;; Integer instructions. Load/execute/store takes 3 cycles, +;; load/execute 2 cycles and execute only one cycle. +(define_insn_reservation "pent_uv_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "both"))) + "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv") + +(define_insn_reservation "pent_u_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "both"))) + "pentium-firstuboth,pentium-u+pentium-memory,pentium-u") + +(define_insn_reservation "pent_v_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "both"))) + "pentium-firstvboth,pentium-v+pentium-memory,pentium-v") + +(define_insn_reservation "pent_np_both" 3 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "both"))) + "pentium-np,pentium-np,pentium-np") + +(define_insn_reservation "pent_uv_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "load"))) + "pentium-firstuvload,pentium-uv") + +(define_insn_reservation "pent_u_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "load"))) + "pentium-firstuload,pentium-u") + +(define_insn_reservation "pent_v_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "load"))) + "pentium-firstvload,pentium-v") + +(define_insn_reservation "pent_np_load" 2 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "load"))) + "pentium-np,pentium-np") + +(define_insn_reservation "pent_uv" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "uv") + (eq_attr "memory" "none"))) + "pentium-firstuv") + +(define_insn_reservation "pent_u" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "none"))) + "pentium-firstu") + +(define_insn_reservation "pent_v" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "pv") + (eq_attr "memory" "none"))) + "pentium-firstv") + +(define_insn_reservation "pent_np" 1 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "none"))) + "pentium-np") + diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h new file mode 100644 index 000000000..764094186 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h @@ -0,0 +1,172 @@ +/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */ +/* Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef _PMMINTRIN_H_INCLUDED +#define _PMMINTRIN_H_INCLUDED + +#ifdef __SSE3__ +#include <xmmintrin.h> +#include <emmintrin.h> + +/* Additional bits in the MXCSR. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#define _MM_DENORMALS_ZERO_ON 0x0040 +#define _MM_DENORMALS_ZERO_OFF 0x0000 + +#define _MM_SET_DENORMALS_ZERO_MODE(mode) \ + _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode)) +#define _MM_GET_DENORMALS_ZERO_MODE() \ + (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* APPLE LOCAL begin radar 4152603 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_addsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movehdup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movshdup ((__v4sf)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_moveldup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movsldup ((__v4sf)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_addsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loaddup_pd (double const *__P) +{ + return _mm_load1_pd (__P); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movedup_pd (__m128d __X) +{ + return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_lddqu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_lddqu ((char const *)__P); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_monitor (void const * __P, unsigned int __E, unsigned int __H) +{ + __builtin_ia32_monitor (__P, __E, __H); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mwait (unsigned int __E, unsigned int __H) +{ + __builtin_ia32_mwait (__E, __H); +} +/* APPLE LOCAL end radar 4152603 */ +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +#endif /* __SSE3__ */ + +#endif /* _PMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md b/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md new file mode 100644 index 000000000..3e31eb336 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md @@ -0,0 +1,763 @@ +;; Scheduling for the Intel P6 family of processors +;; Copyright (C) 2004, 2005 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. */ + +;; The P6 family includes the Pentium Pro, Pentium II, Pentium III, Celeron +;; and Xeon lines of CPUs. The DFA scheduler description in this file is +;; based on information that can be found in the following three documents: +;; +;; "P6 Family of Processors Hardware Developer's Manual", +;; Intel, September 1999. +;; +;; "Intel Architecture Optimization Manual", +;; Intel, 1999 (Order Number: 245127-001). +;; +;; "How to optimize for the Pentium family of microprocessors", +;; by Agner Fog, PhD. +;; +;; The P6 pipeline has three major components: +;; 1) the FETCH/DECODE unit, an in-order issue front-end +;; 2) the DISPATCH/EXECUTE unit, which is the out-of-order core +;; 3) the RETIRE unit, an in-order retirement unit +;; +;; So, the P6 CPUs have out-of-order cores, but the instruction decoder and +;; retirement unit are naturally in-order. +;; +;; BUS INTERFACE UNIT +;; / \ +;; L1 ICACHE L1 DCACHE +;; / | \ | \ +;; DECODER0 DECODER1 DECODER2 DISP/EXEC RETIRE +;; \ | / | | +;; INSTRUCTION POOL __________|_______/ +;; (inc. reorder buffer) +;; +;; Since the P6 CPUs execute instructions out-of-order, the most important +;; consideration in performance tuning is making sure enough micro-ops are +;; ready for execution in the out-of-order core, while not stalling the +;; decoder. +;; +;; TODO: +;; - Find a less crude way to model complex instructions, in +;; particular how many cycles they take to be decoded. +;; - Include decoder latencies in the total reservation latencies. +;; This isn't necessary right now because we assume for every +;; instruction that it never blocks a decoder. +;; - Figure out where the p0 and p1 reservations come from. These +;; appear not to be in the manual (e.g. why is cld "(p0+p1)*2" +;; better than "(p0|p1)*4" ???) +;; - Lots more because I'm sure this is still far from optimal :-) + +;; The ppro_idiv and ppro_fdiv automata are used to model issue +;; latencies of idiv and fdiv type insns. +(define_automaton "ppro_decoder,ppro_core,ppro_idiv,ppro_fdiv,ppro_load,ppro_store") + +;; Simple instructions of the register-register form have only one uop. +;; Load instructions are also only one uop. Store instructions decode to +;; two uops, and simple read-modify instructions also take two uops. +;; Simple instructions of the register-memory form have two to three uops. +;; Simple read-modify-write instructions have four uops. The rules for +;; the decoder are simple: +;; - an instruction with 1 uop can be decoded by any of the three +;; decoders in one cycle. +;; - an instruction with 1 to 4 uops can be decoded only by decoder 0 +;; but still in only one cycle. +;; - a complex (microcode) instruction can also only be decoded by +;; decoder 0, and this takes an unspecified number of cycles. +;; +;; The goal is to schedule such that we have a few-one-one uops sequence +;; in each cycle, to decode as many instructions per cycle as possible. +(define_cpu_unit "decoder0" "ppro_decoder") +(define_cpu_unit "decoder1" "ppro_decoder") +(define_cpu_unit "decoder2" "ppro_decoder") + +;; We first wish to find an instruction for decoder0, so exclude +;; decoder1 and decoder2 from being reserved until decoder 0 is +;; reserved. +(presence_set "decoder1" "decoder0") +(presence_set "decoder2" "decoder0") + +;; Most instructions can be decoded on any of the three decoders. +(define_reservation "decodern" "(decoder0|decoder1|decoder2)") + +;; The out-of-order core has five pipelines. During each cycle, the core +;; may dispatch zero or one uop on the port of any of the five pipelines +;; so the maximum number of dispatched uops per cycle is 5. In practicer, +;; 3 uops per cycle is more realistic. +;; +;; Two of the five pipelines contain several execution units: +;; +;; Port 0 Port 1 Port 2 Port 3 Port 4 +;; ALU ALU LOAD SAC SDA +;; FPU JUE +;; AGU MMX +;; MMX P3FPU +;; P3FPU +;; +;; (SAC=Store Address Calculation, SDA=Store Data Unit, P3FPU = SSE unit, +;; JUE = Jump Execution Unit, AGU = Address Generation Unit) +;; +(define_cpu_unit "p0,p1" "ppro_core") +(define_cpu_unit "p2" "ppro_load") +(define_cpu_unit "p3,p4" "ppro_store") +(define_cpu_unit "idiv" "ppro_idiv") +(define_cpu_unit "fdiv" "ppro_fdiv") + +;; Only the irregular instructions have to be modeled here. A load +;; increases the latency by 2 or 3, or by nothing if the manual gives +;; a latency already. Store latencies are not accounted for. +;; +;; The simple instructions follow a very regular pattern of 1 uop per +;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store +;; on port 4 and port 3. These instructions are modelled at the bottom +;; of this file. +;; +;; For microcoded instructions we don't know how many uops are produced. +;; These instructions are the "complex" ones in the Intel manuals. All +;; we _do_ know is that they typically produce four or more uops, so +;; they can only be decoded on decoder0. Modelling their latencies +;; doesn't make sense because we don't know how these instructions are +;; executed in the core. So we just model that they can only be decoded +;; on decoder 0, and say that it takes a little while before the result +;; is available. +(define_insn_reservation "ppro_complex_insn" 6 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "other,multi,call,callv,str")) + "decoder0") + +;; imov with memory operands does not use the integer units. +(define_insn_reservation "ppro_imov" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "imov"))) + "decodern,(p0|p1)") + +(define_insn_reservation "ppro_imov_load" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "imov"))) + "decodern,p2") + +(define_insn_reservation "ppro_imov_store" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (eq_attr "type" "imov"))) + "decoder0,p4+p3") + +;; imovx always decodes to one uop, and also doesn't use the integer +;; units if it has memory operands. +(define_insn_reservation "ppro_imovx" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "imovx"))) + "decodern,(p0|p1)") + +(define_insn_reservation "ppro_imovx_load" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "imovx"))) + "decodern,p2") + +;; lea executes on port 0 with latency one and throughput 1. +(define_insn_reservation "ppro_lea" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "lea"))) + "decodern,p0") + +;; Shift and rotate execute on port 0 with latency and throughput 1. +;; The load and store units need to be reserved when memory operands +;; are involved. +(define_insn_reservation "ppro_shift_rotate" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) + "decodern,p0") + +(define_insn_reservation "ppro_shift_rotate_mem" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "!none") + (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) + "decoder0,p2+p0,p4+p3") + +(define_insn_reservation "ppro_cld" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "cld")) + "decoder0,(p0+p1)*2") + +;; The P6 has a sophisticated branch prediction mechanism to minimize +;; latencies due to branching. In particular, it has a fast way to +;; execute branches that are taken multiple times (such as in loops). +;; Branches not taken suffer no penalty, and correctly predicted +;; branches cost only one fetch cycle. Mispredicted branches are very +;; costly: typically 15 cycles and possibly as many as 26 cycles. +;; +;; Unfortunately all this makes it quite difficult to properly model +;; the latencies for the compiler. Here I've made the choice to be +;; optimistic and assume branches are often predicted correctly, so +;; they have latency 1, and the decoders are not blocked. +;; +;; In addition, the model assumes a branch always decodes to only 1 uop, +;; which is not exactly true because there are a few instructions that +;; decode to 2 uops or microcode. But this probably gives the best +;; results because we can assume these instructions can decode on all +;; decoders. +(define_insn_reservation "ppro_branch" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "ibr"))) + "decodern,p1") + +;; ??? Indirect branches probably have worse latency than this. +(define_insn_reservation "ppro_indirect_branch" 6 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "!none") + (eq_attr "type" "ibr"))) + "decoder0,p2+p1") + +(define_insn_reservation "ppro_leave" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "leave")) + "decoder0,p2+(p0|p1),(p0|p1)") + +;; imul has throughput one, but latency 4, and can only execute on port 0. +(define_insn_reservation "ppro_imul" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "imul"))) + "decodern,p0") + +(define_insn_reservation "ppro_imul_mem" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "!none") + (eq_attr "type" "imul"))) + "decoder0,p2+p0") + +;; div and idiv are very similar, so we model them the same. +;; QI, HI, and SI have issue latency 12, 21, and 37, respectively. +;; These issue latencies are modelled via the ppro_div automaton. +(define_insn_reservation "ppro_idiv_QI" 19 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "QI") + (eq_attr "type" "idiv")))) + "decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9") + +(define_insn_reservation "ppro_idiv_QI_load" 19 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "QI") + (eq_attr "type" "idiv")))) + "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9") + +(define_insn_reservation "ppro_idiv_HI" 23 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "HI") + (eq_attr "type" "idiv")))) + "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17") + +(define_insn_reservation "ppro_idiv_HI_load" 23 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "HI") + (eq_attr "type" "idiv")))) + "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18") + +(define_insn_reservation "ppro_idiv_SI" 39 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SI") + (eq_attr "type" "idiv")))) + "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33") + +(define_insn_reservation "ppro_idiv_SI_load" 39 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SI") + (eq_attr "type" "idiv")))) + "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*34") + +;; Floating point operations always execute on port 0. +;; ??? where do these latencies come from? fadd has latency 3 and +;; has throughput "1/cycle (align with FADD)". What do they +;; mean and how can we model that? +(define_insn_reservation "ppro_fop" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none,unknown") + (eq_attr "type" "fop"))) + "decodern,p0") + +(define_insn_reservation "ppro_fop_load" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "fop"))) + "decoder0,p2+p0,p0") + +(define_insn_reservation "ppro_fop_store" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (eq_attr "type" "fop"))) + "decoder0,p0,p0,p0+p4+p3") + +(define_insn_reservation "ppro_fop_both" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "both") + (eq_attr "type" "fop"))) + "decoder0,p2+p0,p0+p4+p3") + +(define_insn_reservation "ppro_fsgn" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "fsgn")) + "decodern,p0") + +(define_insn_reservation "ppro_fistp" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "fistp")) + "decoder0,p0*2,p4+p3") + +(define_insn_reservation "ppro_fcmov" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (eq_attr "type" "fcmov")) + "decoder0,p0*2") + +(define_insn_reservation "ppro_fcmp" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "fcmp"))) + "decodern,p0") + +(define_insn_reservation "ppro_fcmp_load" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "fcmp"))) + "decoder0,p2+p0") + +(define_insn_reservation "ppro_fmov" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "fmov"))) + "decodern,p0") + +(define_insn_reservation "ppro_fmov_load" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "!XF") + (eq_attr "type" "fmov")))) + "decodern,p2") + +(define_insn_reservation "ppro_fmov_XF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "XF") + (eq_attr "type" "fmov")))) + "decoder0,(p2+p0)*2") + +(define_insn_reservation "ppro_fmov_store" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (and (eq_attr "mode" "!XF") + (eq_attr "type" "fmov")))) + "decodern,p0") + +(define_insn_reservation "ppro_fmov_XF_store" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (and (eq_attr "mode" "XF") + (eq_attr "type" "fmov")))) + "decoder0,(p0+p4),(p0+p3)") + +;; fmul executes on port 0 with latency 5. It has issue latency 2, +;; but we don't model this. +(define_insn_reservation "ppro_fmul" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "fmul"))) + "decoder0,p0*2") + +(define_insn_reservation "ppro_fmul_load" 6 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "fmul"))) + "decoder0,p2+p0,p0") + +;; fdiv latencies depend on the mode of the operands. XFmode gives +;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18. +;; Division by a power of 2 takes only 9 cycles, but we cannot model +;; that. Throughput is equal to latency - 1, which we model using the +;; ppro_div automaton. +(define_insn_reservation "ppro_fdiv_SF" 18 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "fdiv,fpspc")))) + "decodern,p0+fdiv,fdiv*16") + +(define_insn_reservation "ppro_fdiv_SF_load" 19 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "fdiv,fpspc")))) + "decoder0,p2+p0+fdiv,fdiv*16") + +(define_insn_reservation "ppro_fdiv_DF" 32 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "DF") + (eq_attr "type" "fdiv,fpspc")))) + "decodern,p0+fdiv,fdiv*30") + +(define_insn_reservation "ppro_fdiv_DF_load" 33 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "DF") + (eq_attr "type" "fdiv,fpspc")))) + "decoder0,p2+p0+fdiv,fdiv*30") + +(define_insn_reservation "ppro_fdiv_XF" 38 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "XF") + (eq_attr "type" "fdiv,fpspc")))) + "decodern,p0+fdiv,fdiv*36") + +(define_insn_reservation "ppro_fdiv_XF_load" 39 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "XF") + (eq_attr "type" "fdiv,fpspc")))) + "decoder0,p2+p0+fdiv,fdiv*36") + +;; MMX instructions can execute on either port 0 or port 1 with a +;; throughput of 1/cycle. +;; on port 0: - ALU (latency 1) +;; - Multiplier Unit (latency 3) +;; on port 1: - ALU (latency 1) +;; - Shift Unit (latency 1) +;; +;; MMX instructions are either of the type reg-reg, or read-modify, and +;; except for mmxshft and mmxmul they can execute on port 0 or port 1, +;; so they behave as "simple" instructions that need no special modelling. +;; We only have to model mmxshft and mmxmul. +(define_insn_reservation "ppro_mmx_shft" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "mmxshft"))) + "decodern,p1") + +(define_insn_reservation "ppro_mmx_shft_load" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "mmxshft"))) + "decoder0,p2+p1") + +(define_insn_reservation "ppro_mmx_mul" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "mmxmul"))) + "decodern,p0") + +(define_insn_reservation "ppro_mmx_mul_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (eq_attr "type" "mmxmul"))) + "decoder0,p2+p0") + +(define_insn_reservation "ppro_sse_mmxcvt" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "mode" "DI") + (eq_attr "type" "mmxcvt"))) + "decodern,p1") + +;; FIXME: These are Pentium III only, but we cannot tell here if +;; we're generating code for PentiumPro/Pentium II or Pentium III +;; (define_insn_reservation "ppro_sse_mmxshft" 2 +;; (and (eq_attr "cpu" "pentiumpro,generic32") +;; (and (eq_attr "mode" "DI") +;; (eq_attr "type" "mmxshft"))) +;; "decodern,p0") + +;; SSE is very complicated, and takes a bit more effort. +;; ??? I assumed that all SSE instructions decode on decoder0, +;; but is this correct? + +;; The sfence instruction. +(define_insn_reservation "ppro_sse_sfence" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "unknown") + (eq_attr "type" "sse"))) + "decoder0,p4+p3") + +;; FIXME: This reservation is all wrong when we're scheduling sqrtss. +(define_insn_reservation "ppro_sse_SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "mode" "SF") + (eq_attr "type" "sse"))) + "decodern,p0") + +(define_insn_reservation "ppro_sse_add_SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "sseadd")))) + "decodern,p1") + +(define_insn_reservation "ppro_sse_add_SF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "sseadd")))) + "decoder0,p2+p1") + +(define_insn_reservation "ppro_sse_cmp_SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssecmp")))) + "decoder0,p1") + +(define_insn_reservation "ppro_sse_cmp_SF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssecmp")))) + "decoder0,p2+p1") + +(define_insn_reservation "ppro_sse_comi_SF" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssecomi")))) + "decodern,p0") + +(define_insn_reservation "ppro_sse_comi_SF_load" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssecomi")))) + "decoder0,p2+p0") + +(define_insn_reservation "ppro_sse_mul_SF" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssemul")))) + "decodern,p0") + +(define_insn_reservation "ppro_sse_mul_SF_load" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssemul")))) + "decoder0,p2+p0") + +;; FIXME: ssediv doesn't close p0 for 17 cycles, surely??? +(define_insn_reservation "ppro_sse_div_SF" 18 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssediv")))) + "decoder0,p0*17") + +(define_insn_reservation "ppro_sse_div_SF_load" 18 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssediv")))) + "decoder0,(p2+p0),p0*16") + +(define_insn_reservation "ppro_sse_icvt_SF" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "mode" "SF") + (eq_attr "type" "sseicvt"))) + "decoder0,(p2+p1)*2") + +(define_insn_reservation "ppro_sse_icvt_SI" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "mode" "SI") + (eq_attr "type" "sseicvt"))) + "decoder0,(p2+p1)") + +(define_insn_reservation "ppro_sse_mov_SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssemov")))) + "decoder0,(p0|p1)") + +(define_insn_reservation "ppro_sse_mov_SF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssemov")))) + "decoder0,p2+(p0|p1)") + +(define_insn_reservation "ppro_sse_mov_SF_store" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (and (eq_attr "mode" "SF") + (eq_attr "type" "ssemov")))) + "decoder0,p4+p3") + +(define_insn_reservation "ppro_sse_V4SF" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "sse"))) + "decoder0,p1*2") + +(define_insn_reservation "ppro_sse_add_V4SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "sseadd")))) + "decoder0,p1*2") + +(define_insn_reservation "ppro_sse_add_V4SF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "sseadd")))) + "decoder0,(p2+p1)*2") + +(define_insn_reservation "ppro_sse_cmp_V4SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssecmp")))) + "decoder0,p1*2") + +(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssecmp")))) + "decoder0,(p2+p1)*2") + +(define_insn_reservation "ppro_sse_cvt_V4SF" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssecvt")))) + "decoder0,p1*2") + +(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "!none,unknown") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssecmp")))) + "decoder0,p1,p4+p3") + +(define_insn_reservation "ppro_sse_mul_V4SF" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssemul")))) + "decoder0,p0*2") + +(define_insn_reservation "ppro_sse_mul_V4SF_load" 5 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssemul")))) + "decoder0,(p2+p0)*2") + +;; FIXME: p0 really closed this long??? +(define_insn_reservation "ppro_sse_div_V4SF" 48 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssediv")))) + "decoder0,p0*34") + +(define_insn_reservation "ppro_sse_div_V4SF_load" 48 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssediv")))) + "decoder0,(p2+p0)*2,p0*32") + +(define_insn_reservation "ppro_sse_log_V4SF" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "sselog,sselog1")))) + "decodern,p1") + +(define_insn_reservation "ppro_sse_log_V4SF_load" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "sselog,sselog1")))) + "decoder0,(p2+p1)") + +(define_insn_reservation "ppro_sse_mov_V4SF" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssemov")))) + "decoder0,(p0|p1)*2") + +(define_insn_reservation "ppro_sse_mov_V4SF_load" 2 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssemov")))) + "decoder0,p2*2") + +(define_insn_reservation "ppro_sse_mov_V4SF_store" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (and (eq_attr "mode" "V4SF") + (eq_attr "type" "ssemov")))) + "decoder0,(p4+p3)*2") + +;; All other instructions are modelled as simple instructions. +;; We have already modelled all i387 floating point instructions, so all +;; other instructions execute on either port 0 or port 1. This includes +;; the ALU units, and the MMX units. +;; +;; reg-reg instructions produce 1 uop so they can be decoded on any of +;; the three decoders. +(define_insn_reservation "ppro_insn" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "none,unknown") + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + "decodern,(p0|p1)") + +;; read-modify and register-memory instructions have 2 or three uops, +;; so they have to be decoded on decoder0. +(define_insn_reservation "ppro_insn_load" 3 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "load") + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + "decoder0,p2+(p0|p1)") + +(define_insn_reservation "ppro_insn_store" 1 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "store") + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + "decoder0,(p0|p1),p4+p3") + +;; read-modify-store instructions produce 4 uops so they have to be +;; decoded on decoder0 as well. +(define_insn_reservation "ppro_insn_both" 4 + (and (eq_attr "cpu" "pentiumpro,generic32") + (and (eq_attr "memory" "both") + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + "decoder0,p2+(p0|p1),p4+p3") diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md b/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md new file mode 100644 index 000000000..f988d11e4 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md @@ -0,0 +1,1037 @@ +;; Predicate definitions for IA-32 and x86-64. +;; Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;; Return nonzero if OP is either a i387 or SSE fp register. +(define_predicate "any_fp_register_operand" + (and (match_code "reg") + (match_test "ANY_FP_REGNO_P (REGNO (op))"))) + +;; Return nonzero if OP is an i387 fp register. +(define_predicate "fp_register_operand" + (and (match_code "reg") + (match_test "FP_REGNO_P (REGNO (op))"))) + +;; Return nonzero if OP is a non-fp register_operand. +(define_predicate "register_and_not_any_fp_reg_operand" + (and (match_code "reg") + (not (match_test "ANY_FP_REGNO_P (REGNO (op))")))) + +;; Return nonzero if OP is a register operand other than an i387 fp register. +(define_predicate "register_and_not_fp_reg_operand" + (and (match_code "reg") + (not (match_test "FP_REGNO_P (REGNO (op))")))) + +;; True if the operand is an MMX register. +(define_predicate "mmx_reg_operand" + (and (match_code "reg") + (match_test "MMX_REGNO_P (REGNO (op))"))) + +;; True if the operand is a Q_REGS class register. +(define_predicate "q_regs_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return ANY_QI_REG_P (op); +}) + +;; Return true if op is a NON_Q_REGS class register. +(define_predicate "non_q_regs_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return NON_QI_REG_P (op); +}) + +;; Match an SI or HImode register for a zero_extract. +(define_special_predicate "ext_register_operand" + (match_operand 0 "register_operand") +{ + if ((!TARGET_64BIT || GET_MODE (op) != DImode) + && GET_MODE (op) != SImode && GET_MODE (op) != HImode) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + /* Be careful to accept only registers having upper parts. */ + return REGNO (op) > LAST_VIRTUAL_REGISTER || REGNO (op) < 4; +}) + +;; Return true if op is the AX register. +(define_predicate "ax_reg_operand" + (and (match_code "reg") + (match_test "REGNO (op) == 0"))) + +;; Return true if op is the flags register. +(define_predicate "flags_reg_operand" + (and (match_code "reg") + (match_test "REGNO (op) == FLAGS_REG"))) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; Return true if op is not xmm0 register. +(define_predicate "reg_not_xmm0_operand" + (and (match_operand 0 "register_operand") + (match_test "GET_CODE (op) != REG + || REGNO (op) != FIRST_SSE_REG"))) + +;; As above, but allow nonimmediate operands. +(define_predicate "nonimm_not_xmm0_operand" + (and (match_operand 0 "nonimmediate_operand") + (match_test "GET_CODE (op) != REG + || REGNO (op) != FIRST_SSE_REG"))) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Return 1 if VALUE can be stored in a sign extended immediate field. +(define_predicate "x86_64_immediate_operand" + (match_code "const_int,symbol_ref,label_ref,const") +{ + if (!TARGET_64BIT) + return immediate_operand (op, mode); + + switch (GET_CODE (op)) + { + case CONST_INT: + /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known + to be at least 32 and this all acceptable constants are + represented as CONST_INT. */ + if (HOST_BITS_PER_WIDE_INT == 32) + return 1; + else + { + HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (op), DImode); + return trunc_int_for_mode (val, SImode) == val; + } + break; + + case SYMBOL_REF: + /* For certain code models, the symbolic references are known to fit. + in CM_SMALL_PIC model we know it fits if it is local to the shared + library. Don't count TLS SYMBOL_REFs here, since they should fit + only if inside of UNSPEC handled below. */ + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op)) + return false; + return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL + || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op))); + + case LABEL_REF: + /* For certain code models, the code is near as well. */ + return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM + || ix86_cmodel == CM_KERNEL); + + case CONST: + /* We also may accept the offsetted memory references in certain + special cases. */ + if (GET_CODE (XEXP (op, 0)) == UNSPEC) + switch (XINT (XEXP (op, 0), 1)) + { + case UNSPEC_GOTPCREL: + case UNSPEC_DTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_NTPOFF: + return 1; + default: + break; + } + + if (GET_CODE (XEXP (op, 0)) == PLUS) + { + rtx op1 = XEXP (XEXP (op, 0), 0); + rtx op2 = XEXP (XEXP (op, 0), 1); + HOST_WIDE_INT offset; + + if (ix86_cmodel == CM_LARGE) + return 0; + if (GET_CODE (op2) != CONST_INT) + return 0; + offset = trunc_int_for_mode (INTVAL (op2), DImode); + switch (GET_CODE (op1)) + { + case SYMBOL_REF: + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op1)) + return 0; + /* For CM_SMALL assume that latest object is 16MB before + end of 31bits boundary. We may also accept pretty + large negative constants knowing that all objects are + in the positive half of address space. */ + if ((ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM + && !SYMBOL_REF_FAR_ADDR_P (op1))) + && offset < 16*1024*1024 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + /* For CM_KERNEL we know that all object resist in the + negative half of 32bits address space. We may not + accept negative offsets, since they may be just off + and we may accept pretty large positive ones. */ + if (ix86_cmodel == CM_KERNEL + && offset > 0 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + break; + + case LABEL_REF: + /* These conditions are similar to SYMBOL_REF ones, just the + constraints for code models differ. */ + if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) + && offset < 16*1024*1024 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + if (ix86_cmodel == CM_KERNEL + && offset > 0 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + break; + + case UNSPEC: + switch (XINT (op1, 1)) + { + case UNSPEC_DTPOFF: + case UNSPEC_NTPOFF: + if (offset > 0 + && trunc_int_for_mode (offset, SImode) == offset) + return 1; + } + break; + + default: + break; + } + } + break; + + default: + gcc_unreachable (); + } + + return 0; +}) + +;; Return 1 if VALUE can be stored in the zero extended immediate field. +(define_predicate "x86_64_zext_immediate_operand" + (match_code "const_double,const_int,symbol_ref,label_ref,const") +{ + switch (GET_CODE (op)) + { + case CONST_DOUBLE: + if (HOST_BITS_PER_WIDE_INT == 32) + return (GET_MODE (op) == VOIDmode && !CONST_DOUBLE_HIGH (op)); + else + return 0; + + case CONST_INT: + if (HOST_BITS_PER_WIDE_INT == 32) + return INTVAL (op) >= 0; + else + return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff); + + case SYMBOL_REF: + /* For certain code models, the symbolic references are known to fit. */ + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op)) + return false; + return (ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM + && !SYMBOL_REF_FAR_ADDR_P (op))); + + case LABEL_REF: + /* For certain code models, the code is near as well. */ + return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; + + case CONST: + /* We also may accept the offsetted memory references in certain + special cases. */ + if (GET_CODE (XEXP (op, 0)) == PLUS) + { + rtx op1 = XEXP (XEXP (op, 0), 0); + rtx op2 = XEXP (XEXP (op, 0), 1); + + if (ix86_cmodel == CM_LARGE) + return 0; + switch (GET_CODE (op1)) + { + case SYMBOL_REF: + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op1)) + return 0; + /* For small code model we may accept pretty large positive + offsets, since one bit is available for free. Negative + offsets are limited by the size of NULL pointer area + specified by the ABI. */ + if ((ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM + && !SYMBOL_REF_FAR_ADDR_P (op1))) + && GET_CODE (op2) == CONST_INT + && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 + && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2)) + return 1; + /* ??? For the kernel, we may accept adjustment of + -0x10000000, since we know that it will just convert + negative address space to positive, but perhaps this + is not worthwhile. */ + break; + + case LABEL_REF: + /* These conditions are similar to SYMBOL_REF ones, just the + constraints for code models differ. */ + if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) + && GET_CODE (op2) == CONST_INT + && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 + && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2)) + return 1; + break; + + default: + return 0; + } + } + break; + + default: + gcc_unreachable (); + } + return 0; +}) + +;; Return nonzero if OP is general operand representable on x86_64. +(define_predicate "x86_64_general_operand" + (if_then_else (match_test "TARGET_64BIT") + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "x86_64_immediate_operand")) + (match_operand 0 "general_operand"))) + +;; Return nonzero if OP is general operand representable on x86_64 +;; as either sign extended or zero extended constant. +(define_predicate "x86_64_szext_general_operand" + (if_then_else (match_test "TARGET_64BIT") + (ior (match_operand 0 "nonimmediate_operand") + (ior (match_operand 0 "x86_64_immediate_operand") + (match_operand 0 "x86_64_zext_immediate_operand"))) + (match_operand 0 "general_operand"))) + +;; Return nonzero if OP is nonmemory operand representable on x86_64. +(define_predicate "x86_64_nonmemory_operand" + (if_then_else (match_test "TARGET_64BIT") + (ior (match_operand 0 "register_operand") + (match_operand 0 "x86_64_immediate_operand")) + (match_operand 0 "nonmemory_operand"))) + +;; Return nonzero if OP is nonmemory operand representable on x86_64. +(define_predicate "x86_64_szext_nonmemory_operand" + (if_then_else (match_test "TARGET_64BIT") + (ior (match_operand 0 "register_operand") + (ior (match_operand 0 "x86_64_immediate_operand") + (match_operand 0 "x86_64_zext_immediate_operand"))) + (match_operand 0 "nonmemory_operand"))) + +;; Return true when operand is PIC expression that can be computed by lea +;; operation. +(define_predicate "pic_32bit_operand" + (match_code "const,symbol_ref,label_ref") +{ + if (!flag_pic) + return 0; + /* Rule out relocations that translate into 64bit constants. */ + if (TARGET_64BIT && GET_CODE (op) == CONST) + { + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT) + op = XEXP (op, 0); + if (GET_CODE (op) == UNSPEC + && (XINT (op, 1) == UNSPEC_GOTOFF + || XINT (op, 1) == UNSPEC_GOT)) + return 0; + } + return symbolic_operand (op, mode); +}) + + +;; Return nonzero if OP is nonmemory operand acceptable by movabs patterns. +(define_predicate "x86_64_movabs_operand" + (if_then_else (match_test "!TARGET_64BIT || !flag_pic") + (match_operand 0 "nonmemory_operand") + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_double_operand") + (match_test "GET_MODE_SIZE (mode) <= 8"))))) + +;; Returns nonzero if OP is either a symbol reference or a sum of a symbol +;; reference and a constant. +(define_predicate "symbolic_operand" + (match_code "symbol_ref,label_ref,const") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + case LABEL_REF: + return 1; + + case CONST: + op = XEXP (op, 0); + if (GET_CODE (op) == SYMBOL_REF + || GET_CODE (op) == LABEL_REF + || (GET_CODE (op) == UNSPEC + && (XINT (op, 1) == UNSPEC_GOT + || XINT (op, 1) == UNSPEC_GOTOFF + || XINT (op, 1) == UNSPEC_GOTPCREL))) + return 1; + if (GET_CODE (op) != PLUS + || GET_CODE (XEXP (op, 1)) != CONST_INT) + return 0; + + op = XEXP (op, 0); + if (GET_CODE (op) == SYMBOL_REF + || GET_CODE (op) == LABEL_REF) + return 1; + /* Only @GOTOFF gets offsets. */ + if (GET_CODE (op) != UNSPEC + || XINT (op, 1) != UNSPEC_GOTOFF) + return 0; + + op = XVECEXP (op, 0, 0); + if (GET_CODE (op) == SYMBOL_REF + || GET_CODE (op) == LABEL_REF) + return 1; + return 0; + + default: + gcc_unreachable (); + } +}) + +;; Return true if the operand contains a @GOT or @GOTOFF reference. +(define_predicate "pic_symbolic_operand" + (match_code "const") +{ + op = XEXP (op, 0); + if (TARGET_64BIT) + { + if (GET_CODE (op) == UNSPEC + && XINT (op, 1) == UNSPEC_GOTPCREL) + return 1; + if (GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == UNSPEC + && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL) + return 1; + } + else + { + if (GET_CODE (op) == UNSPEC) + return 1; + if (GET_CODE (op) != PLUS + || GET_CODE (XEXP (op, 1)) != CONST_INT) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == UNSPEC) + return 1; + } + return 0; +}) + +;; Return true if OP is a symbolic operand that resolves locally. +(define_predicate "local_symbolic_operand" + (match_code "const,label_ref,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) == LABEL_REF) + return 1; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + if (SYMBOL_REF_TLS_MODEL (op) != 0) + return 0; + +/* APPLE LOCAL begin fix-and-continue 6358507 */ + if (SYMBOL_REF_LOCAL_P (op)) + { +#if TARGET_MACHO + if (!indirect_data (op) + || machopic_data_defined_p (op)) +#endif + return 1; + } +/* APPLE LOCAL end fix-and-continue 6358507 */ + + /* There is, however, a not insubstantial body of code in the rest of + the compiler that assumes it can just stick the results of + ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ + /* ??? This is a hack. Should update the body of the compiler to + always create a DECL an invoke targetm.encode_section_info. */ + if (strncmp (XSTR (op, 0), internal_label_prefix, + internal_label_prefix_len) == 0) + return 1; + + return 0; +}) + +;; Test for various thread-local symbols. +(define_predicate "tls_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) + +(define_predicate "tls_modbase_operand" + (and (match_code "symbol_ref") + (match_test "op == ix86_tls_module_base ()"))) + +(define_predicate "tp_or_register_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_TP")))) + +;; Test for a pc-relative call operand +(define_predicate "constant_call_address_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "local_symbolic_operand"))) + +;; True for any non-virtual or eliminable register. Used in places where +;; instantiation of such a register may cause the pattern to not be recognized. +(define_predicate "register_no_elim_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return !(op == arg_pointer_rtx + || op == frame_pointer_rtx + || (REGNO (op) >= FIRST_PSEUDO_REGISTER + && REGNO (op) <= LAST_VIRTUAL_REGISTER)); +}) + +;; Similarly, but include the stack pointer. This is used to prevent esp +;; from being used as an index reg. +(define_predicate "index_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (reload_in_progress || reload_completed) + return REG_OK_FOR_INDEX_STRICT_P (op); + else + return REG_OK_FOR_INDEX_NONSTRICT_P (op); +}) + +;; Return false if this is any eliminable register. Otherwise general_operand. +(define_predicate "general_no_elim_operand" + (if_then_else (match_code "reg,subreg") + (match_operand 0 "register_no_elim_operand") + (match_operand 0 "general_operand"))) + +;; Return false if this is any eliminable register. Otherwise +;; register_operand or a constant. +(define_predicate "nonmemory_no_elim_operand" + (ior (match_operand 0 "register_no_elim_operand") + (match_operand 0 "immediate_operand"))) + +;; Test for a valid operand for a call instruction. +(define_predicate "call_insn_operand" + (ior (match_operand 0 "constant_call_address_operand") + (ior (match_operand 0 "register_no_elim_operand") + (match_operand 0 "memory_operand")))) + +;; Similarly, but for tail calls, in which we cannot allow memory references. +(define_predicate "sibcall_insn_operand" + (ior (match_operand 0 "constant_call_address_operand") + (match_operand 0 "register_no_elim_operand"))) + +;; Match exactly zero. +(define_predicate "const0_operand" + (match_code "const_int,const_double,const_vector") +{ + if (mode == VOIDmode) + mode = GET_MODE (op); + return op == CONST0_RTX (mode); +}) + +;; Match exactly one. +(define_predicate "const1_operand" + (and (match_code "const_int") + (match_test "op == const1_rtx"))) + +;; Match exactly eight. +(define_predicate "const8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 8"))) + +;; Match 2, 4, or 8. Used for leal multiplicands. +(define_predicate "const248_operand" + (match_code "const_int") +{ + HOST_WIDE_INT i = INTVAL (op); + return i == 2 || i == 4 || i == 8; +}) + +;; Match 0 or 1. +(define_predicate "const_0_to_1_operand" + (and (match_code "const_int") + (match_test "op == const0_rtx || op == const1_rtx"))) + +;; Match 0 to 3. +(define_predicate "const_0_to_3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3"))) + +;; Match 0 to 7. +(define_predicate "const_0_to_7_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7"))) + +;; Match 0 to 15. +(define_predicate "const_0_to_15_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))) + +;; Match 0 to 63. +(define_predicate "const_0_to_63_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63"))) + +;; Match 0 to 255. +(define_predicate "const_0_to_255_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 255"))) + +;; Match (0 to 255) * 8 +(define_predicate "const_0_to_255_mul_8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT val = INTVAL (op); + return val <= 255*8 && val % 8 == 0; +}) + +;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand +;; for shift & compare patterns, as shifting by 0 does not change flags). +(define_predicate "const_1_to_31_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31"))) + +;; Match 2 or 3. +(define_predicate "const_2_to_3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 3"))) + +;; Match 4 to 7. +(define_predicate "const_4_to_7_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 4 && INTVAL (op) <= 7"))) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; Match exactly one bit in 2-bit mask. +(define_predicate "const_pow2_1_to_2_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; Match exactly one bit in 4-bit mask. +(define_predicate "const_pow2_1_to_8_operand" + (match_code "const_int") +{ + unsigned int log = exact_log2 (INTVAL (op)); + return log <= 3; +}) + +;; Match exactly one bit in 8-bit mask. +(define_predicate "const_pow2_1_to_128_operand" + (match_code "const_int") +{ + unsigned int log = exact_log2 (INTVAL (op)); + return log <= 7; +}) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; Match exactly one bit in 16-bit mask. +(define_predicate "const_pow2_1_to_32768_operand" + (match_code "const_int") +{ + unsigned int log = exact_log2 (INTVAL (op)); + return log <= 15; +}) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; True if this is a constant appropriate for an increment or decrement. +(define_predicate "incdec_operand" + (match_code "const_int") +{ + /* On Pentium4, the inc and dec operations causes extra dependency on flag + registers, since carry flag is not set. */ + if (!TARGET_USE_INCDEC && !optimize_size) + return 0; + return op == const1_rtx || op == constm1_rtx; +}) + +;; True for registers, or 1 or -1. Used to optimize double-word shifts. +(define_predicate "reg_or_pm1_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "op == const1_rtx || op == constm1_rtx")))) + +;; True if OP is acceptable as operand of DImode shift expander. +(define_predicate "shiftdi_operand" + (if_then_else (match_test "TARGET_64BIT") + (match_operand 0 "nonimmediate_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "ashldi_input_operand" + (if_then_else (match_test "TARGET_64BIT") + (match_operand 0 "nonimmediate_operand") + (match_operand 0 "reg_or_pm1_operand"))) + +;; Return true if OP is a vector load from the constant pool with just +;; the first element nonzero. +(define_predicate "zero_extended_scalar_load_operand" + (match_code "mem") +{ + unsigned n_elts; + op = maybe_get_pool_constant (op); + if (!op) + return 0; + if (GET_CODE (op) != CONST_VECTOR) + return 0; + n_elts = + (GET_MODE_SIZE (GET_MODE (op)) / + GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op)))); + for (n_elts--; n_elts > 0; n_elts--) + { + rtx elt = CONST_VECTOR_ELT (op, n_elts); + if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op)))) + return 0; + } + return 1; +}) + +/* Return true if operand is a vector constant that is all ones. */ +(define_predicate "vector_all_ones_operand" + (match_code "const_vector") +{ + int nunits = GET_MODE_NUNITS (mode); + + if (GET_CODE (op) == CONST_VECTOR + && CONST_VECTOR_NUNITS (op) == nunits) + { + int i; + for (i = 0; i < nunits; ++i) + { + rtx x = CONST_VECTOR_ELT (op, i); + if (x != constm1_rtx) + return 0; + } + return 1; + } + + return 0; +}) + +; Return 1 when OP is operand acceptable for standard SSE move. +(define_predicate "vector_move_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand"))) + +;; Return 1 when OP is nonimmediate or standard SSE constant. +(define_predicate "nonimmediate_or_sse_const_operand" + (match_operand 0 "general_operand") +{ + if (nonimmediate_operand (op, mode)) + return 1; + if (standard_sse_constant_p (op) > 0) + return 1; + return 0; +}) + +;; APPLE LOCAL begin mainline +/* MERGE FIXME was this replaced by reg_or_0_operand below */ +;; Return true if OP is a nonimmediate or a zero. +(define_predicate "nonimmediate_or_0_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand"))) +;; APPLE LOCAL end mainline + +;; Return true if OP is a register or a zero. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Return true if op if a valid address, and does not contain +;; a segment override. +(define_special_predicate "no_seg_address_operand" + (match_operand 0 "address_operand") +{ + struct ix86_address parts; + int ok; + + ok = ix86_decompose_address (op, &parts); + gcc_assert (ok); + return parts.seg == SEG_DEFAULT; +}) + +;; Return nonzero if the rtx is known to be at least 32 bits aligned. +(define_predicate "aligned_operand" + (match_operand 0 "general_operand") +{ + struct ix86_address parts; + int ok; + + /* Registers and immediate operands are always "aligned". */ + if (GET_CODE (op) != MEM) + return 1; + + /* All patterns using aligned_operand on memory operands ends up + in promoting memory operand to 64bit and thus causing memory mismatch. */ + if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size) + return 0; + + /* Don't even try to do any aligned optimizations with volatiles. */ + if (MEM_VOLATILE_P (op)) + return 0; + + if (MEM_ALIGN (op) >= 32) + return 1; + + op = XEXP (op, 0); + + /* Pushes and pops are only valid on the stack pointer. */ + if (GET_CODE (op) == PRE_DEC + || GET_CODE (op) == POST_INC) + return 1; + + /* Decode the address. */ + ok = ix86_decompose_address (op, &parts); + gcc_assert (ok); + + /* Look for some component that isn't known to be aligned. */ + if (parts.index) + { + if (REGNO_POINTER_ALIGN (REGNO (parts.index)) * parts.scale < 32) + return 0; + } + if (parts.base) + { + if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) + return 0; + } + if (parts.disp) + { + if (GET_CODE (parts.disp) != CONST_INT + || (INTVAL (parts.disp) & 3) != 0) + return 0; + } + + /* Didn't find one -- this must be an aligned address. */ + return 1; +}) + +;; Returns 1 if OP is memory operand with a displacement. +(define_predicate "memory_displacement_operand" + (match_operand 0 "memory_operand") +{ + struct ix86_address parts; + int ok; + + ok = ix86_decompose_address (XEXP (op, 0), &parts); + gcc_assert (ok); + return parts.disp != NULL_RTX; +}) + +;; Returns 1 if OP is memory operand with a displacement only. +(define_predicate "memory_displacement_only_operand" + (match_operand 0 "memory_operand") +{ + struct ix86_address parts; + int ok; + + ok = ix86_decompose_address (XEXP (op, 0), &parts); + gcc_assert (ok); + + if (parts.base || parts.index) + return 0; + + return parts.disp != NULL_RTX; +}) + +;; Returns 1 if OP is memory operand that cannot be represented +;; by the modRM array. +(define_predicate "long_memory_operand" + (and (match_operand 0 "memory_operand") + (match_test "memory_address_length (op) != 0"))) + +;; Return 1 if OP is a comparison operator that can be issued by fcmov. +(define_predicate "fcmov_comparison_operator" + (match_operand 0 "comparison_operator") +{ + enum machine_mode inmode = GET_MODE (XEXP (op, 0)); + enum rtx_code code = GET_CODE (op); + + if (inmode == CCFPmode || inmode == CCFPUmode) + { + enum rtx_code second_code, bypass_code; + ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); + if (bypass_code != UNKNOWN || second_code != UNKNOWN) + return 0; + code = ix86_fp_compare_code_to_integer (code); + } + /* i387 supports just limited amount of conditional codes. */ + switch (code) + { + case LTU: case GTU: case LEU: case GEU: + if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) + return 1; + return 0; + case ORDERED: case UNORDERED: + case EQ: case NE: + return 1; + default: + return 0; + } +}) + +;; Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns. +;; The first set are supported directly; the second set can't be done with +;; full IEEE support, i.e. NaNs. +;; +;; ??? It would seem that we have a lot of uses of this predicate that pass +;; it the wrong mode. We got away with this because the old function didn't +;; check the mode at all. Mirror that for now by calling this a special +;; predicate. + +(define_special_predicate "sse_comparison_operator" + (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered")) + +;; Return 1 if OP is a valid comparison operator in valid mode. +(define_predicate "ix86_comparison_operator" + (match_operand 0 "comparison_operator") +{ + enum machine_mode inmode = GET_MODE (XEXP (op, 0)); + enum rtx_code code = GET_CODE (op); + + if (inmode == CCFPmode || inmode == CCFPUmode) + { + enum rtx_code second_code, bypass_code; + ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); + return (bypass_code == UNKNOWN && second_code == UNKNOWN); + } + switch (code) + { + case EQ: case NE: + return 1; + case LT: case GE: + if (inmode == CCmode || inmode == CCGCmode + || inmode == CCGOCmode || inmode == CCNOmode) + return 1; + return 0; + case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: + if (inmode == CCmode) + return 1; + return 0; + case GT: case LE: + if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) + return 1; + return 0; + default: + return 0; + } +}) + +;; Return 1 if OP is a valid comparison operator testing carry flag to be set. +(define_predicate "ix86_carry_flag_operator" + (match_code "ltu,lt,unlt,gt,ungt,le,unle,ge,unge,ltgt,uneq") +{ + enum machine_mode inmode = GET_MODE (XEXP (op, 0)); + enum rtx_code code = GET_CODE (op); + + if (GET_CODE (XEXP (op, 0)) != REG + || REGNO (XEXP (op, 0)) != FLAGS_REG + || XEXP (op, 1) != const0_rtx) + return 0; + + if (inmode == CCFPmode || inmode == CCFPUmode) + { + enum rtx_code second_code, bypass_code; + ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); + if (bypass_code != UNKNOWN || second_code != UNKNOWN) + return 0; + code = ix86_fp_compare_code_to_integer (code); + } + else if (inmode != CCmode) + return 0; + + return code == LTU; +}) + +;; Nearly general operand, but accept any const_double, since we wish +;; to be able to drop them into memory rather than have them get pulled +;; into registers. +(define_predicate "cmp_fp_expander_operand" + (ior (match_code "const_double") + (match_operand 0 "general_operand"))) + +;; Return true if this is a valid binary floating-point operation. +(define_predicate "binary_fp_operator" + (match_code "plus,minus,mult,div")) + +;; Return true if this is a multiply operation. +(define_predicate "mult_operator" + (match_code "mult")) + +;; Return true if this is a division operation. +(define_predicate "div_operator" + (match_code "div")) + +;; Return true if this is a float extend operation. +(define_predicate "float_operator" + (match_code "float")) + +;; Return true for ARITHMETIC_P. +(define_predicate "arith_or_logical_operator" + (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div, + mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert")) + +;; Return 1 if OP is a binary operator that can be promoted to wider mode. +;; Modern CPUs have same latency for HImode and SImode multiply, +;; but 386 and 486 do HImode multiply faster. */ +(define_predicate "promotable_binary_operator" + (ior (match_code "plus,and,ior,xor,ashift") + (and (match_code "mult") + (match_test "ix86_tune > PROCESSOR_I486")))) + +;; To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, +;; re-recognize the operand to avoid a copy_to_mode_reg that will fail. +;; +;; ??? It seems likely that this will only work because cmpsi is an +;; expander, and no actual insns use this. + +(define_predicate "cmpsi_operand" + (ior (match_operand 0 "nonimmediate_operand") + (and (match_code "and") + (match_code "zero_extract" "0") + (match_code "const_int" "1") + (match_code "const_int" "01") + (match_code "const_int" "02") + (match_test "INTVAL (XEXP (XEXP (op, 0), 1)) == 8") + (match_test "INTVAL (XEXP (XEXP (op, 0), 2)) == 8") + ))) + +(define_predicate "compare_operator" + (match_code "compare")) + +(define_predicate "absneg_operator" + (match_code "abs,neg")) diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h new file mode 100644 index 000000000..2da9a7460 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h @@ -0,0 +1,836 @@ +/* APPLE LOCAL file 5612787 mainline sse4 */ +/* Copyright (C) 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 10.0. */ + +#ifndef _SMMINTRIN_H_INCLUDED +#define _SMMINTRIN_H_INCLUDED + +#ifndef __SSE4_1__ +# error "SSE4.1 instruction set not enabled" +#else + +/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header + files. */ +#include <tmmintrin.h> + +/* SSE4.1 */ + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +#define _MM_FROUND_NINT \ + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_FLOOR \ + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_CEIL \ + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_TRUNC \ + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_RINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_NEARBYINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* Integer blend instructions - select data from 2 sources using + constant/variable mask. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M) +{ + return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X, + (__v8hi)__Y, + __M); +} +#else +#define _mm_blend_epi16(X, Y, M) \ + ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M))) +#endif + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M) +{ + return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X, + (__v16qi)__Y, + (__v16qi)__M); +} + +/* Single precision floating point blend instructions - select data + from 2 sources using constant/variable mask. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_blend_ps (__m128 __X, __m128 __Y, const int __M) +{ + return (__m128) __builtin_ia32_blendps ((__v4sf)__X, + (__v4sf)__Y, + __M); +} +#else +#define _mm_blend_ps(X, Y, M) \ + ((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M))) +#endif + +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M) +{ + return (__m128) __builtin_ia32_blendvps ((__v4sf)__X, + (__v4sf)__Y, + (__v4sf)__M); +} + +/* Double precision floating point blend instructions - select data + from 2 sources using constant/variable mask. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +_mm_blend_pd (__m128d __X, __m128d __Y, const int __M) +{ + return (__m128d) __builtin_ia32_blendpd ((__v2df)__X, + (__v2df)__Y, + __M); +} +#else +#define _mm_blend_pd(X, Y, M) \ + ((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M))) +#endif + +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M) +{ + return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X, + (__v2df)__Y, + (__v2df)__M); +} + +/* Dot product instructions with mask-defined summing and zeroing parts + of result. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_dp_ps (__m128 __X, __m128 __Y, const int __M) +{ + return (__m128) __builtin_ia32_dpps ((__v4sf)__X, + (__v4sf)__Y, + __M); +} + +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +_mm_dp_pd (__m128d __X, __m128d __Y, const int __M) +{ + return (__m128d) __builtin_ia32_dppd ((__v2df)__X, + (__v2df)__Y, + __M); +} +#else +#define _mm_dp_ps(X, Y, M) \ + ((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M))) + +#define _mm_dp_pd(X, Y, M) \ + ((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M))) +#endif + +/* Packed integer 64-bit comparison, zeroing or filling with ones + corresponding parts of result. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cmpeq_epi64 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y); +} + +/* Min/max packed integer instructions. */ + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_min_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_max_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_min_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_max_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_min_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_max_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_min_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_max_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y); +} + +/* Packed integer 32-bit multiplication with truncation of upper + halves of results. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_mullo_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y); +} + +/* Packed integer 32-bit multiplication of 2 pairs of operands + with two 64-bit results. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_mul_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y); +} + +/* Packed integer 128-bit bitwise comparison. Return 1 if + (__V & __M) == 0. */ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_testz_si128 (__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V); +} + +/* Packed integer 128-bit bitwise comparison. Return 1 if + (__V & ~__M) == 0. */ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_testc_si128 (__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V); +} + +/* Packed integer 128-bit bitwise comparison. Return 1 if + (__V & __M) != 0 && (__V & ~__M) != 0. */ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_testnzc_si128 (__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V); +} + +/* Macros for packed integer 128-bit comparison intrinsics. */ +#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) + +#define _mm_test_all_ones(V) \ + _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V))) + +#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) + +/* Insert single precision float into packed single precision array + element selected by index N. The bits [7-6] of N define S + index, the bits [5-4] define D index, and bits [3-0] define + zeroing mask for D. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_insert_ps (__m128 __D, __m128 __S, const int __N) +{ + return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D, + (__v4sf)__S, + __N); +} +#else +#define _mm_insert_ps(D, S, N) \ + ((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N))) +#endif + +/* Helper macro to create the N value for _mm_insert_ps. */ +#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M)) + +/* Extract binary representation of single precision float from packed + single precision array element of X selected by index N. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_extract_ps (__m128 __X, const int __N) +{ + union { int i; float f; } __tmp; + __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N); + return __tmp.i; +} +#else +#define _mm_extract_ps(X, N) \ + (__extension__ \ + ({ \ + union { int i; float f; } __tmp; \ + __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N)); \ + __tmp.i; \ + }) \ + ) +#endif + +/* Extract binary representation of single precision float into + D from packed single precision array element of S selected + by index N. */ +#define _MM_EXTRACT_FLOAT(D, S, N) \ + { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); } + +/* Extract specified single precision float element into the lower + part of __m128. */ +#define _MM_PICK_OUT_PS(X, N) \ + _mm_insert_ps (_mm_setzero_ps (), (X), \ + _MM_MK_INSERTPS_NDX ((N), 0, 0x0e)) + +/* Insert integer, S, into packed integer array element of D + selected by index N. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_insert_epi8 (__m128i __D, int __S, const int __N) +{ + return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D, + __S, __N); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_insert_epi32 (__m128i __D, int __S, const int __N) +{ + return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D, + __S, __N); +} + +#ifdef __x86_64__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_insert_epi64 (__m128i __D, long long __S, const int __N) +{ + return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D, + __S, __N); +} +#endif +#else +#define _mm_insert_epi8(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N))) + +#define _mm_insert_epi32(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N))) + +#ifdef __x86_64__ +#define _mm_insert_epi64(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N))) +#endif +#endif + +/* Extract integer from packed integer array element of X selected by + index N. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_extract_epi8 (__m128i __X, const int __N) +{ + return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_extract_epi32 (__m128i __X, const int __N) +{ + return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N); +} + +#ifdef __x86_64__ +__STATIC_INLINE long long __attribute__((__always_inline__)) +_mm_extract_epi64 (__m128i __X, const int __N) +{ + return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N); +} +#endif +#else +#define _mm_extract_epi8(X, N) \ + __builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N)) +#define _mm_extract_epi32(X, N) \ + __builtin_ia32_vec_ext_v4si ((__v4si) X, (N)) + +#ifdef __x86_64__ +#define _mm_extract_epi64(X, N) \ + ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N))) +#endif +#endif + +/* Return horizontal packed word minimum and its index in bits [15:0] + and bits [18:16] respectively. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_minpos_epu16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X); +} + +/* Packed/scalar double precision floating point rounding. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +_mm_round_pd (__m128d __V, const int __M) +{ + return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M); +} + +__STATIC_INLINE __m128d __attribute__((__always_inline__)) +_mm_round_sd(__m128d __D, __m128d __V, const int __M) +{ + return (__m128d) __builtin_ia32_roundsd ((__v2df)__D, + (__v2df)__V, + __M); +} +#else +#define _mm_round_pd(V, M) \ + ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M))) + +#define _mm_round_sd(D, V, M) \ + ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M))) +#endif + +/* Packed/scalar single precision floating point rounding. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_round_ps (__m128 __V, const int __M) +{ + return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M); +} + +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +_mm_round_ss (__m128 __D, __m128 __V, const int __M) +{ + return (__m128) __builtin_ia32_roundss ((__v4sf)__D, + (__v4sf)__V, + __M); +} +#else +#define _mm_round_ps(V, M) \ + ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M))) + +#define _mm_round_ss(D, V, M) \ + ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M))) +#endif + +/* Macros for ceil/floor intrinsics. */ +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) + +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) + +/* Packed integer sign-extension. */ + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi8_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi16_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi8_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi32_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi16_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepi8_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X); +} + +/* Packed integer zero-extension. */ + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu8_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu16_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu8_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu32_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu16_epi64 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cvtepu8_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X); +} + +/* Pack 8 double words from 2 operands into 8 words of result with + unsigned saturation. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_packus_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y); +} + +/* Sum absolute 8-bit integer difference of adjacent groups of 4 + byte integers in the first 2 operands. Starting offsets within + operands are determined by the 3rd mask operand. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M) +{ + return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X, + (__v16qi)__Y, __M); +} +#else +#define _mm_mpsadbw_epu8(X, Y, M) \ + ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M))) +#endif + +/* Load double quadword using non-temporal aligned hint. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_stream_load_si128 (__m128i *__X) +{ + return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X); +} + +#ifdef __SSE4_2__ + +/* These macros specify the source data format. */ +#define SIDD_UBYTE_OPS 0x00 +#define SIDD_UWORD_OPS 0x01 +#define SIDD_SBYTE_OPS 0x02 +#define SIDD_SWORD_OPS 0x03 + +/* These macros specify the comparison operation. */ +#define SIDD_CMP_EQUAL_ANY 0x00 +#define SIDD_CMP_RANGES 0x04 +#define SIDD_CMP_EQUAL_EACH 0x08 +#define SIDD_CMP_EQUAL_ORDERED 0x0c + +/* These macros specify the the polarity. */ +#define SIDD_POSITIVE_POLARITY 0x00 +#define SIDD_NEGATIVE_POLARITY 0x10 +#define SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define SIDD_MASKED_NEGATIVE_POLARITY 0x30 + +/* These macros specify the output selection in _mm_cmpXstri (). */ +#define SIDD_LEAST_SIGNIFICANT 0x00 +#define SIDD_MOST_SIGNIFICANT 0x40 + +/* These macros specify the output selection in _mm_cmpXstrm (). */ +#define SIDD_BIT_MASK 0x00 +#define SIDD_UNIT_MASK 0x40 + +/* Intrinsics for text/string processing. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M) +{ + return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistri (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistri128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} +#else +#define _mm_cmpistrm(X, Y, M) \ + ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M))) +#define _mm_cmpistri(X, Y, M) \ + __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M)) + +#define _mm_cmpestrm(X, LX, Y, LY, M) \ + ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M))) +#define _mm_cmpestri(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#endif + +/* Intrinsics for text/string processing and reading values of + EFlags. */ + +#ifdef __OPTIMIZE__ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistra (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistria128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistric128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistro (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistrio128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistris128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistriz128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} +#else +#define _mm_cmpistra(X, Y, M) \ + __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrc(X, Y, M) \ + __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistro(X, Y, M) \ + __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrs(X, Y, M) \ + __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrz(X, Y, M) \ + __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M)) + +#define _mm_cmpestra(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrc(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestro(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrs(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrz(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#endif + +/* Packed integer 64-bit comparison, zeroing or filling with ones + corresponding parts of result. */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +_mm_cmpgt_epi64 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y); +} + +/* Calculate a number of bits set to 1. */ +__STATIC_INLINE int __attribute__((__always_inline__)) +_mm_popcnt_u32 (unsigned int __X) +{ + return __builtin_popcount (__X); +} + +#ifdef __x86_64__ +__STATIC_INLINE long long __attribute__((__always_inline__)) +_mm_popcnt_u64 (unsigned long long __X) +{ + return __builtin_popcountll (__X); +} +#endif + +/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +_mm_crc32_u8 (unsigned int __C, unsigned char __V) +{ + return __builtin_ia32_crc32qi (__C, __V); +} + +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +_mm_crc32_u16 (unsigned int __C, unsigned short __V) +{ + return __builtin_ia32_crc32hi (__C, __V); +} + +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +_mm_crc32_u32 (unsigned int __C, unsigned int __V) +{ + return __builtin_ia32_crc32si (__C, __V); +} + +#ifdef __x86_64__ +__STATIC_INLINE unsigned long long __attribute__((__always_inline__)) +_mm_crc32_u64 (unsigned long long __C, unsigned long long __V) +{ + return __builtin_ia32_crc32di (__C, __V); +} +#endif + +#endif /* __SSE4_2__ */ + +#endif /* __SSE4_1__ */ + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +#endif /* _SMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/sse.md b/gcc-4.2.1-5666.3/gcc/config/i386/sse.md new file mode 100644 index 000000000..40318d83a --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/sse.md @@ -0,0 +1,6218 @@ +;; GCC machine description for SSE instructions +;; Copyright (C) 2005, 2006 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + + +;; 16 byte integral modes handled by SSE, minus TImode, which gets +;; special-cased for TARGET_64BIT. +(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) + +;; All 16-byte vector modes handled by SSE +(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Mix-n-match +(define_mode_macro SSEMODE12 [V16QI V8HI]) +(define_mode_macro SSEMODE24 [V8HI V4SI]) +(define_mode_macro SSEMODE14 [V16QI V4SI]) +(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) +(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) + +;; Mapping from integer vector mode to mnemonic suffix +(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) + +;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Move patterns +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; All of these patterns are enabled for SSE1 as well as SSE2. +;; This is essential for maintaining stable calling conventions. + +(define_expand "mov<mode>" + [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") + (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (<MODE>mode, operands); + DONE; +}) + +(define_insn "*mov<mode>_internal" + [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return standard_sse_constant_opcode (insn, operands[1]); + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (if_then_else + (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)))) + (const_string "V4SF") + (const_string "TI")))]) + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V4SFmode, operands); + DONE; +}) + +(define_insn "*movv4sf_internal" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] + "TARGET_SSE" +{ + switch (which_alternative) + { + case 0: + return standard_sse_constant_opcode (insn, operands[1]); + case 1: + case 2: + return "movaps\t{%1, %0|%0, %1}"; + default: + abort(); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "mode" "V4SF")]) + +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE && reload_completed" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + operands[2] = CONST0_RTX (V4SFmode); +}) + +(define_expand "movv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DFmode, operands); + DONE; +}) + +(define_insn "*movv2df_internal" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return standard_sse_constant_opcode (insn, operands[1]); + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (if_then_else + (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)))) + (const_string "V4SF") + (const_string "V2DF")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] +{ + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + operands[2] = CONST0_RTX (DFmode); +}) + +(define_expand "push<mode>1" + [(match_operand:SSEMODE 0 "register_operand" "")] + "TARGET_SSE" +{ + ix86_expand_push (<MODE>mode, operands[0]); + DONE; +}) + +(define_expand "movmisalign<mode>" + [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") + (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move_misalign (<MODE>mode, operands); + DONE; +}) + +(define_insn "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movups\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movupd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movupd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movdqu" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse_movntv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE" + "movntps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_movntv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movntv2di" + [(set (match_operand:V2DI 0 "memory_operand" "=m") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; APPLE LOCAL begin 4099020, 4121692 +(define_insn "sse_loadqv4si" + [(set (match_operand:V4SI 0 "register_operand" "=rx") + (unspec:V4SI [(zero_extend:V4SI (match_operand:V2SI 1 "memory_operand" "m"))] UNSPEC_LDQ))] + "TARGET_SSE" + "movq\t{%1, %0|%0, %1}") + +;; APPLE LOCAL begin 4279065 +(define_insn "sse_storeqv4si" + [(set (match_operand:V2SI 0 "memory_operand" "=m") + (unspec:V2SI [(match_operand:V4SI 1 "register_operand" "x")] UNSPEC_STOQ))] + "TARGET_SSE" + "movq\t{%1, %0|%0, %1}") +;; APPLE LOCAL end 4279065 + +(define_insn "sse_movqv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(zero_extend:V4SI (subreg:V2SI + (match_operand:V4SI 1 "register_operand" "x") 0))] UNSPEC_MOVQ))] + "TARGET_SSE" + "movq\t{%1, %0|%0, %1}") +;; APPLE LOCAL end 4099020, 4121692 + +(define_insn "sse3_lddqu" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] + UNSPEC_LDQQU))] + "TARGET_SSE3" + "lddqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "negv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "") + (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") + +(define_expand "absv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "") + (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") + +(define_expand "addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") + +(define_insn "*addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmaddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_expand "subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") + +(define_insn "*subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "subps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "subss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_expand "mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") + +(define_insn "*mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" + "mulps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmmulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" + "mulss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "SF")]) + +(define_expand "divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (div:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") + +(define_insn "*divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "divps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmdivv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "divss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "SF")]) + +(define_insn "sse_rcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] + "TARGET_SSE" + "rcpps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmrcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rcpss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "sse_rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] + "TARGET_SSE" + "rsqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmrsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rsqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "sqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX +;; isn't really correct, as those rtl operators aren't defined when +;; applied to NaNs. Hopefully the optimizers won't get too smart on us. + +(define_expand "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" +{ + if (!flag_finite_math_only) + operands[1] = force_reg (V4SFmode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); +}) + +(define_insn "*smaxv4sf3_finite" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && flag_finite_math_only + && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" + "maxps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "*smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "maxps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsmaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" +{ + if (!flag_finite_math_only) + operands[1] = force_reg (V4SFmode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); +}) + +(define_insn "*sminv4sf3_finite" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && flag_finite_math_only + && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" + "minps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "*sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "minps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*ieee_sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE" + "minps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "*ieee_smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE" + "maxps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "*ieee_sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE2" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "*ieee_smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE2" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_addsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (plus:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (minus:V4SF (match_dup 1) (match_dup 2)) + (const_int 5)))] + "TARGET_SSE3" + "addsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_haddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSE3" + "haddps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_hsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSE3" + "hsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_expand "reduc_splus_v4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "")] + "TARGET_SSE" +{ + if (TARGET_SSE3) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); + emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); + } + else + ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smax_v4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "")] + "TARGET_SSE" +{ + ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smin_v4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "")] + "TARGET_SSE" +{ + ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_maskcmpv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (match_operator:V4SF 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE" + "cmp%D3ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmmaskcmpv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operator:V4SF 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "cmp%D3ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) + +(define_insn "sse_comi" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "comiss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "SF")]) + +(define_insn "sse_ucomi" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "ucomiss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "SF")]) + +(define_expand "vcondv4sf" + [(set (match_operand:V4SF 0 "register_operand" "") + (if_then_else:V4SF + (match_operator 3 "" + [(match_operand:V4SF 4 "nonimmediate_operand" "") + (match_operand:V4SF 5 "nonimmediate_operand" "")]) + (match_operand:V4SF 1 "general_operand" "") + (match_operand:V4SF 2 "general_operand" "")))] + "TARGET_SSE" +{ + if (ix86_expand_fp_vcond (operands)) + DONE; + else + FAIL; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "andv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") + +(define_insn "*andv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_nandv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "iorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") + +(define_insn "*iorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "xorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") + +(define_insn "*xorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;; Also define scalar versions. These are used for abs, neg, and +;; conditional move. Using subregs into vector modes causes register +;; allocation lossage. These patterns do not allow memory operands +;; because the native instructions read the full 128-bits. + +(define_insn "*andsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (and:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*nandsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*iorsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (ior:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*xorsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (xor:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4SF 1 "register_operand" "0") + (const_int 3)))] + "TARGET_SSE" + "cvtpi2ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "DI")]) + +(define_insn "sse_cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE" + "cvtsi2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE" + "cvtss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse_cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE && TARGET_64BIT" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse_cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse_cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) + (parallel [(const_int 6) + (const_int 7) + (const_int 2) + (const_int 3)])))] + "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movhlps\t{%2, %0|%0, %2} + movlps\t{%H2, %0|%0, %H2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +; APPLE LOCAL begin radar 4099352 +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,o") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0,0") + /* APPLE LOCAL mainline */ + (match_operand:V4SF 2 "nonimmediate_or_0_operand" " C,x,m,x")) + (parallel [(const_int 0) + (const_int 1) + (const_int 4) + (const_int 5)])))] + "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" + "@ + xorps\t%0, %0 + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) +; APPLE LOCAL end radar 4099352 + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE" + "unpckhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE" + "unpcklps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;; These are modeled with the same vec_concat as the others so that we +;; capture users of shufps that can use the new instructions +(define_insn "sse3_movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 1) + (const_int 1) + (const_int 7) + (const_int 7)])))] + "TARGET_SSE3" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 0) + (const_int 6) + (const_int 6)])))] + "TARGET_SSE3" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_shufps" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_SSE" +{ + int mask = INTVAL (operands[3]); + emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "sse_shufps_1" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_4_to_7_operand" "") + (match_operand 6 "const_4_to_7_operand" "")])))] + "TARGET_SSE" +{ + int mask = 0; + mask |= INTVAL (operands[3]) << 0; + mask |= INTVAL (operands[4]) << 2; + mask |= (INTVAL (operands[5]) - 4) << 4; + mask |= (INTVAL (operands[6]) - 4) << 6; + operands[3] = GEN_INT (mask); + + return "shufps\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_storehps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_SSE" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] + "TARGET_SSE" + "@ + movhps\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_storelps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "@ + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_concat:V4SF + (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_SSE" + "@ + shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} + movlps\t{%2, %0|%0, %2} + movlps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 2 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "movss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn "*vec_dupv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_duplicate:V4SF + (match_operand:SF 1 "register_operand" "0")))] + "TARGET_SSE" + "shufps\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "type" "sselog1") + (set_attr "mode" "V4SF")]) + +;; ??? In theory we can match memory for the MMX alternative, but allowing +;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE +;; alternatives pretty much forces the MMX alternative to be chosen. +(define_insn "*sse_concatv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") + (vec_concat:V2SF + (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") + (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] + "TARGET_SSE" + "@ + unpcklps\t{%2, %0|%0, %2} + movss\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") + (set_attr "mode" "V4SF,SF,DI,DI")]) + +(define_insn "*sse_concatv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" " 0,0") + (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] + "TARGET_SSE" + "@ + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF")]) + +(define_expand "vec_initv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_insn "*vec_setv4sf_0" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) + /* APPLE LOCAL mainline */ + (match_operand:V4SF 1 "nonimmediate_or_0_operand" " 0,C,C ,0") + (const_int 1)))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + movss\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + #" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; A subset is vec_setv4sf. +(define_insn "*vec_setv4sf_sse4_1" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 2 "nonimmediate_operand" "xm")) + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); + return "insertps\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_insertps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_INSERTPS))] + "TARGET_SSE4_1" + "insertps\t{%3, %2, %0|%0, %2, %3}"; + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_split + [(set (match_operand:V4SF 0 "memory_operand" "") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 1 "nonmemory_operand" "")) + (match_dup 0) + (const_int 1)))] + "TARGET_SSE && reload_completed" + [(const_int 0)] +{ + emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); + DONE; +}) + +(define_expand "vec_setv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_insn_and_split "*vec_extractv4sf_0" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") + (parallel [(const_int 0)])))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SFmode, REGNO (op1)); + else + op1 = gen_lowpart (SFmode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "*sse4_1_extractps" + [(set (match_operand:SF 0 "register_operand" "=rm") + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] + "TARGET_SSE4_1" + "extractps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "vec_extractv4sf" + [(match_operand:SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "negv2df2" + [(set (match_operand:V2DF 0 "register_operand" "") + (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") + +(define_expand "absv2df2" + [(set (match_operand:V2DF 0 "register_operand" "") + (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") + +(define_expand "addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") + +(define_insn "*addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" + "addpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmaddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") + +(define_insn "*subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "subpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "subsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") + +(define_insn "*mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" + "mulpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmmulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" + "mulsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "DF")]) + +(define_expand "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (div:V2DF (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") + +(define_insn "*divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "divpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmdivv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "divsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "DF")]) + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) + (match_operand:V2DF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) + +;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX +;; isn't really correct, as those rtl operators aren't defined when +;; applied to NaNs. Hopefully the optimizers won't get too smart on us. + +(define_expand "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" +{ + if (!flag_finite_math_only) + operands[1] = force_reg (V2DFmode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); +}) + +(define_insn "*smaxv2df3_finite" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && flag_finite_math_only + && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "*smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsmaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" +{ + if (!flag_finite_math_only) + operands[1] = force_reg (V2DFmode, operands[1]); + ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); +}) + +(define_insn "*sminv2df3_finite" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && flag_finite_math_only + && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "*sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "sse3_addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (plus:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (minus:V2DF (match_dup 1) (match_dup 2)) + (const_int 1)))] + "TARGET_SSE3" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (plus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (minus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (minus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_expand "reduc_splus_v2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "")] + "TARGET_SSE3" +{ + emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_maskcmpv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (match_operator:V2DF 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE2" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmmaskcmpv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operator:V2DF 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_comi" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "comisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "DF")]) + +(define_insn "sse2_ucomi" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "ucomisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "DF")]) + +(define_expand "vcondv2df" + [(set (match_operand:V2DF 0 "register_operand" "") + (if_then_else:V2DF + (match_operator 3 "" + [(match_operand:V2DF 4 "nonimmediate_operand" "") + (match_operand:V2DF 5 "nonimmediate_operand" "")]) + (match_operand:V2DF 1 "general_operand" "") + (match_operand:V2DF 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_fp_vcond (operands)) + DONE; + else + FAIL; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "andv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") + +(define_insn "*andv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_nandv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "iorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") + +(define_insn "*iorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "xorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") + +(define_insn "*xorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +;; Also define scalar versions. These are used for abs, neg, and +;; conditional move. Using subregs into vector modes causes register +;; allocation lossage. These patterns do not allow memory operands +;; because the native instructions read the full 128-bits. + +(define_insn "*anddf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (and:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*nanddf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*iordf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (ior:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*xordf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (xor:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_cvtpi2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] + "TARGET_SSE2" + "cvtpi2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx,*") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_cvtpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvttpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvttpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvtsi2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V2DF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2" + "cvtsi2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) + +(define_insn "sse2_cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V2DF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) + +(define_insn "sse2_cvtsd2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI + [(vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse2_cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI + [(vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvttsd2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SI") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "sse2_cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "sse2_cvtdq2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] + UNSPEC_FIX_NOTRUNC) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SImode);") + +(define_insn "*sse2_cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (match_operand:V2SI 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvtpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_expand "sse2_cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SImode);") + +(define_insn "*sse2_cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_operand:V2SI 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvttpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2" + "cvtsd2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse2_cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))) + (match_operand:V2DF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "cvtss2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_expand "sse2_cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SFmode);") + +(define_insn "*sse2_cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_operand:V2SF 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvtpd2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_cvtps2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" + "cvtps2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_unpckhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + unpckhpd\t{%2, %0|%0, %2} + movlpd\t{%H1, %0|%0, %H1} + movhpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V2DF,V1DF,V1DF")]) + +(define_insn "*sse3_movddup" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movddup\t{%1, %0|%0, %1} + #" + [(set_attr "type" "sselog1,ssemov") + (set_attr "mode" "V2DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE3 && reload_completed" + [(const_int 0)] +{ + rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); + emit_move_insn (adjust_address (operands[0], DFmode, 0), low); + emit_move_insn (adjust_address (operands[0], DFmode, 8), low); + DONE; +}) + +(define_insn "sse2_unpcklpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + unpcklpd\t{%2, %0|%0, %2} + movhpd\t{%2, %0|%0, %2} + movlpd\t{%2, %H0|%H0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V2DF,V1DF,V1DF")]) + +(define_expand "sse2_shufpd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[3]); + emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], + GEN_INT (mask & 1), + GEN_INT (mask & 2 ? 3 : 2))); + DONE; +}) + +(define_insn "sse2_shufpd_1" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (parallel [(match_operand 3 "const_0_to_1_operand" "") + (match_operand 4 "const_2_to_3_operand" "")])))] + "TARGET_SSE2" +{ + int mask; + mask = INTVAL (operands[3]); + mask |= (INTVAL (operands[4]) - 2) << 1; + operands[3] = GEN_INT (mask); + + return "shufpd\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_storehpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhpd\t{%1, %0|%0, %1} + unpckhpd\t%0, %0 + #" + [(set_attr "type" "ssemov,sselog1,ssemov") + (set_attr "mode" "V1DF,V2DF,DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = adjust_address (operands[1], DFmode, 8); +}) + +(define_insn "sse2_storelpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movlpd\t{%1, %0|%0, %1} + # + #" + [(set_attr "type" "ssemov") + (set_attr "mode" "V1DF,DF,DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (DFmode, REGNO (op1)); + else + op1 = gen_lowpart (DFmode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "sse2_loadhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") + (vec_concat:V2DF + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") + (parallel [(const_int 0)])) + (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movhpd\t{%2, %0|%0, %2} + unpcklpd\t{%2, %0|%0, %2} + shufpd\t{$1, %1, %0|%0, %1, 1} + #" + [(set_attr "type" "ssemov,sselog,sselog,other") + (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_concat:V2DF + (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) + (match_operand:DF 1 "register_operand" "")))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = adjust_address (operands[0], DFmode, 8); +}) + +(define_insn "sse2_loadlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") + (vec_concat:V2DF + (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") + (vec_select:DF + /* APPLE LOCAL mainline */ + (match_operand:V2DF 1 "nonimmediate_or_0_operand" " C,0,0,x,o,0") + (parallel [(const_int 1)]))))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movsd\t{%2, %0|%0, %2} + shufpd\t{$2, %2, %0|%0, %2, 2} + movhpd\t{%H1, %0|%0, %H1} + #" + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") + (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_concat:V2DF + (match_operand:DF 1 "register_operand" "") + (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = adjust_address (operands[0], DFmode, 8); +}) + +;; Not sure these two are ever used, but it doesn't hurt to have +;; them. -aoliva +(define_insn "*vec_extractv2df_1_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 1)])))] + "!TARGET_SSE2 && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "*vec_extractv2df_0_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0)])))] + "!TARGET_SSE2 && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") + (vec_merge:V2DF + (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") + (const_int 1)))] + "TARGET_SSE2" + "@ + movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + shufpd\t{$2, %2, %0|%0, %2, 2} + movhps\t{%H1, %0|%0, %H1} + movhps\t{%1, %H0|%H0, %1}" + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) + +(define_insn "*vec_dupv2df_sse3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE3" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "mode" "DF")]) + +(define_insn "*vec_dupv2df" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (match_operand:DF 1 "register_operand" "0")))] + "TARGET_SSE2" + "unpcklpd\t%0, %0" + [(set_attr "type" "sselog1") + (set_attr "mode" "V4SF")]) + +(define_insn "*vec_concatv2df_sse3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (match_operand:DF 1 "nonimmediate_operand" "xm") + (match_dup 1)))] + "TARGET_SSE3" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "mode" "DF")]) + +(define_insn "*vec_concatv2df" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x") + (vec_concat:V2DF + (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0") + /* APPLE LOCAL mainline */ + (match_operand:DF 2 "nonimmediate_or_0_operand" " Yt,m,C,x,m")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + unpcklpd\t{%2, %0|%0, %2} + movhpd\t{%2, %0|%0, %2} + movsd\t{%1, %0|%0, %1} + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") + (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) + +(define_expand "vec_setv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv2df" + [(match_operand:DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "neg<mode>2" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (minus:SSEMODEI + (match_dup 2) + (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") + +(define_expand "add<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") + +(define_insn "*add<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (plus:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "padd<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ssadd<mode>3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (ss_plus:SSEMODE12 + (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)" + "padds<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_usadd<mode>3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (us_plus:SSEMODE12 + (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)" + "paddus<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "sub<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") + +(define_insn "*sub<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (minus:SSEMODEI + (match_operand:SSEMODEI 1 "register_operand" "0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psub<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_sssub<mode>3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (ss_minus:SSEMODE12 + (match_operand:SSEMODE12 1 "register_operand" "0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubs<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ussub<mode>3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (us_minus:SSEMODE12 + (match_operand:SSEMODE12 1 "register_operand" "0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubus<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "mulv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "") + (mult:V16QI (match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx t[12], op0; + int i; + + for (i = 0; i < 12; ++i) + t[i] = gen_reg_rtx (V16QImode); + + /* Unpack data such that we've got a source byte in each low byte of + each word. We don't care what goes into the high byte of each word. + Rather than trying to get zero in there, most convenient is to let + it be a copy of the low byte. */ + emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); + emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); + emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); + emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); + + /* Multiply words. The end-of-line annotations here give a picture of what + the output of that instruction looks like. Dot means don't care; the + letters are the bytes of the result with A being the most significant. */ + emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ + gen_lowpart (V8HImode, t[0]), + gen_lowpart (V8HImode, t[1]))); + emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ + gen_lowpart (V8HImode, t[2]), + gen_lowpart (V8HImode, t[3]))); + + /* Extract the relevant bytes and merge them back together. */ + emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ + emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ + emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ + emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ + emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ + emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ + + op0 = operands[0]; + emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ + DONE; +}) + +(define_expand "mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + +(define_insn "*mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_expand "smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" ""))) + (const_int 16))))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + +(define_insn "*smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_expand "umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (zero_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "")) + (zero_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" ""))) + (const_int 16))))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + +(define_insn "*umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (zero_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (mult:V2DI + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_mulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" + "pmuldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 +(define_insn "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSE2" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "mulv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (mult:V4SI (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx t1, t2, t3, t4, t5, t6, thirtytwo; + rtx op0, op1, op2; + + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V4SImode); + t4 = gen_reg_rtx (V4SImode); + t5 = gen_reg_rtx (V4SImode); + t6 = gen_reg_rtx (V4SImode); + thirtytwo = GEN_INT (32); + + /* Multiply elements 2 and 0. */ + emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); + + /* Shift both input vectors down one element, so that elements 3 and 1 + are now in the slots for elements 2 and 0. For K8, at least, this is + faster than using a shuffle. */ + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), + gen_lowpart (TImode, op1), thirtytwo)); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), + gen_lowpart (TImode, op2), thirtytwo)); + + /* Multiply elements 3 and 1. */ + emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); + + /* Move the results in element 2 down to element 1; we don't care what + goes in elements 2 and 3. */ + emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + + /* Merge the parts back together. */ + emit_insn (gen_sse2_punpckldq (op0, t5, t6)); + DONE; +}) + +(define_expand "mulv2di3" + [(set (match_operand:V2DI 0 "register_operand" "") + (mult:V2DI (match_operand:V2DI 1 "register_operand" "") + (match_operand:V2DI 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx t1, t2, t3, t4, t5, t6, thirtytwo; + rtx op0, op1, op2; + + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V2DImode); + t2 = gen_reg_rtx (V2DImode); + t3 = gen_reg_rtx (V2DImode); + t4 = gen_reg_rtx (V2DImode); + t5 = gen_reg_rtx (V2DImode); + t6 = gen_reg_rtx (V2DImode); + thirtytwo = GEN_INT (32); + + /* Multiply low parts. */ + emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, op2))); + + /* Shift input vectors left 32 bits so we can multiply high parts. */ + emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); + emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); + + /* Multiply high parts by low parts. */ + emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, t3))); + emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), + gen_lowpart (V4SImode, t2))); + + /* Shift them back. */ + emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); + emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); + + /* Add the three parts together. */ + emit_insn (gen_addv2di3 (t6, t1, t4)); + emit_insn (gen_addv2di3 (op0, t6, t5)); + DONE; +}) + +(define_expand "sdot_prodv8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "") + (match_operand:V4SI 3 "register_operand" "")] + "TARGET_SSE2" +{ + rtx t = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2])); + emit_insn (gen_addv4si3 (operands[0], operands[3], t)); + DONE; +}) + +(define_expand "udot_prodv4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "") + (match_operand:V2DI 3 "register_operand" "")] + "TARGET_SSE2" +{ + rtx t1, t2, t3, t4; + + t1 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); + emit_insn (gen_addv2di3 (t1, t1, operands[3])); + + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), + gen_lowpart (TImode, operands[1]), + GEN_INT (32))); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), + gen_lowpart (TImode, operands[2]), + GEN_INT (32))); + + t4 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); + + emit_insn (gen_addv2di3 (operands[0], t1, t4)); + DONE; +}) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "*sse4_1_mulv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" + "pmulld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_expand "vec_widen_smult_hi_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2, dest; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V8HImode); + t2 = gen_reg_rtx (V8HImode); + dest = gen_lowpart (V8HImode, operands[0]); + + emit_insn (gen_mulv8hi3 (t1, op1, op2)); + emit_insn (gen_smulv8hi3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2)); + DONE; +}) + +(define_expand "vec_widen_smult_lo_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2, dest; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V8HImode); + t2 = gen_reg_rtx (V8HImode); + dest = gen_lowpart (V8HImode, operands[0]); + + emit_insn (gen_mulv8hi3 (t1, op1, op2)); + emit_insn (gen_smulv8hi3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2)); + DONE; +}) + +(define_expand "vec_widen_umult_hi_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2, dest; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V8HImode); + t2 = gen_reg_rtx (V8HImode); + dest = gen_lowpart (V8HImode, operands[0]); + + emit_insn (gen_mulv8hi3 (t1, op1, op2)); + emit_insn (gen_umulv8hi3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2)); + DONE; +}) + +(define_expand "vec_widen_umult_lo_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2, dest; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V8HImode); + t2 = gen_reg_rtx (V8HImode); + dest = gen_lowpart (V8HImode, operands[0]); + + emit_insn (gen_mulv8hi3 (t1, op1, op2)); + emit_insn (gen_umulv8hi3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2)); + DONE; +}) + +(define_expand "vec_widen_smult_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); + emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); + DONE; +}) + +(define_expand "vec_widen_smult_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); + emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); + DONE; +}) + +(define_expand "vec_widen_umult_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); + emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); + DONE; +}) + +(define_expand "vec_widen_umult_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_SSE2" +{ + rtx op1, op2, t1, t2; + + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); + emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); + DONE; +}) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_insn "ashr<mode>3" + [(set (match_operand:SSEMODE24 0 "register_operand" "=x") + (ashiftrt:SSEMODE24 + (match_operand:SSEMODE24 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xn")))] + "TARGET_SSE2" + "psra<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshr<mode>3" + [(set (match_operand:SSEMODE248 0 "register_operand" "=x") + (lshiftrt:SSEMODE248 + (match_operand:SSEMODE248 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xn")))] + "TARGET_SSE2" + "psrl<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashl<mode>3" + [(set (match_operand:SSEMODE248 0 "register_operand" "=x") + (ashift:SSEMODE248 + (match_operand:SSEMODE248 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xn")))] + "TARGET_SSE2" + "psll<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +/* APPLE LOCAL begin 6440204 */ +/* Moved to i386.md. */ +/* APPLE LOCAL end 6440204 */ + +(define_expand "vec_shl_<mode>" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (!const_0_to_255_mul_8_operand (operands[2], SImode)) + FAIL; + operands[0] = gen_lowpart (TImode, operands[0]); + operands[1] = gen_lowpart (TImode, operands[1]); +}) + +;; APPLE LOCAL begin mainline 5951842 +;; moved sse2_lshrti3 to i386.md +;; APPLE LOCAL end mainline 5951842 + +(define_expand "vec_shr_<mode>" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (!const_0_to_255_mul_8_operand (operands[2], SImode)) + FAIL; + operands[0] = gen_lowpart (TImode, operands[0]); + operands[1] = gen_lowpart (TImode, operands[1]); +}) + +(define_expand "umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "") + (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") + (match_operand:V16QI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") + +(define_insn "*umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") + +(define_insn "*smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "umaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (set (match_dup 3) + (plus:V8HI (match_dup 0) (match_dup 2)))] + "TARGET_SSE2" +{ + operands[3] = operands[0]; + if (rtx_equal_p (operands[0], operands[2])) + operands[0] = gen_reg_rtx (V8HImode); +}) + +(define_expand "smax<mode>3" + [(set (match_operand:SSEMODE14 0 "register_operand" "") + (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") + (match_operand:SSEMODE14 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; +}) + +(define_expand "umaxv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (umax:V4SI (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")))] + "TARGET_SSE2" +{ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + if (TARGET_SSE4_1) + ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands); + else + { + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; + } + /* APPLE LOCAL end 5612787 mainline sse4 */ +}) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "sse4_1_smax<mode>3" + [(set (match_operand:SSEMODE14 0 "register_operand" "=x") + (smax:SSEMODE14 + (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)" + "pmaxs<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_umax<mode>3" + [(set (match_operand:SSEMODE24 0 "register_operand" "=x") + (umax:SSEMODE24 + (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)" + "pmaxu<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "") + (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") + (match_operand:V16QI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);") + +(define_insn "*uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") + +(define_insn "*sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "smin<mode>3" + [(set (match_operand:SSEMODE14 0 "register_operand" "") + (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") + (match_operand:SSEMODE14 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[2]; + xops[2] = operands[1]; + xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; +}) + +(define_expand "umin<mode>3" + [(set (match_operand:SSEMODE24 0 "register_operand" "") + (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") + (match_operand:SSEMODE24 2 "register_operand" "")))] + "TARGET_SSE2" +{ + /* APPLE LOCAL begin 5612787 mainline sse4 */ + if (TARGET_SSE4_1) + ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands); + else + { + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[2]; + xops[2] = operands[1]; + xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; + } + /* APPLE LOCAL end 5612787 mainline sse4 */ +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_eq<mode>3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (eq:SSEMODE124 + (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" + "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "sse4_1_eqv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (eq:V2DI + (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" + "pcmpeqq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_insn "sse2_gt<mode>3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (gt:SSEMODE124 + (match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "sse4_2_gtv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (gt:V2DI + (match_operand:V2DI 1 "nonimmediate_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_2" + "pcmpgtq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "vcond<mode>" + [(set (match_operand:SSEMODE124 0 "register_operand" "") + (if_then_else:SSEMODE124 + (match_operator 3 "" + [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") + (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODE124 1 "general_operand" "") + (match_operand:SSEMODE124 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_int_vcond (operands)) + DONE; + else + FAIL; +}) + +(define_expand "vcondu<mode>" + [(set (match_operand:SSEMODE124 0 "register_operand" "") + (if_then_else:SSEMODE124 + (match_operator 3 "" + [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") + (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODE124 1 "general_operand" "") + (match_operand:SSEMODE124 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_int_vcond (operands)) + DONE; + else + FAIL; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "one_cmpl<mode>2" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_dup 2)))] + "TARGET_SSE2" +{ + int i, n = GET_MODE_NUNITS (<MODE>mode); + rtvec v = rtvec_alloc (n); + + for (i = 0; i < n; ++i) + RTVEC_ELT (v, i) = constm1_rtx; + + operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); +}) + +(define_expand "and<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") + +(define_insn "*and<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (and:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_nand<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (and:SSEMODEI + (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "ior<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") + +(define_insn "*ior<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (ior:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "xor<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") + +(define_insn "*xor<mode>3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (xor:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "sse4_1_smin<mode>3" + [(set (match_operand:SSEMODE14 0 "register_operand" "=x") + (smin:SSEMODE14 + (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)" + "pmins<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_umin<mode>3" + [(set (match_operand:SSEMODE24 0 "register_operand" "=x") + (umin:SSEMODE24 + (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)" + "pminu<ssevecsize>\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_expand "vec_interleave_highv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_lowv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_highv8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_lowv8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_highv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_lowv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_highv2di" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_interleave_lowv2di" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2])); + DONE; +}) +;; APPLE LOCAL end 5612787 mainline sse4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_packsswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (ss_truncate:V8QI + (match_operand:V8HI 1 "register_operand" "0")) + (ss_truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packssdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (ss_truncate:V4HI + (match_operand:V4SI 1 "register_operand" "0")) + (ss_truncate:V4HI + (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packuswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (us_truncate:V8QI + (match_operand:V8HI 1 "register_operand" "0")) + (us_truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_SSE2" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "TARGET_SSE2" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_SSE2" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_SSE2" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhdq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE2" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckldq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE2" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2" + "punpckhqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2" + "punpcklqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "*sse4_1_pinsrb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_duplicate:V16QI + (match_operand:QI 2 "nonimmediate_operand" "rm")) + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "") + (vec_merge:V8HI + (vec_duplicate:V8HI + (match_operand:SI 2 "nonimmediate_operand" "")) + (match_operand:V8HI 1 "register_operand" "") + (match_operand:SI 3 "const_0_to_7_operand" "")))] + "TARGET_SSE2" +{ + operands[2] = gen_lowpart (HImode, operands[2]); + operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); +}) + +(define_insn "*sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_duplicate:V8HI + (match_operand:HI 2 "nonimmediate_operand" "rm")) + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] + "TARGET_SSE2" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;; It must come before sse2_loadld since it is preferred. +(define_insn "*sse4_1_pinsrd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_duplicate:V4SI + (match_operand:SI 2 "nonimmediate_operand" "rm")) + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrd\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_pinsrq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 2 "nonimmediate_operand" "rm")) + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_pextrb" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] + "TARGET_SSE4_1" + "pextrb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_pextrb_memory" + [(set (match_operand:QI 0 "memory_operand" "=m") + (vec_select:QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] + "TARGET_SSE4_1" + "pextrb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_insn "sse2_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] + "TARGET_SSE2" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "*sse4_1_pextrw_memory" + [(set (match_operand:HI 0 "memory_operand" "=m") + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] + "TARGET_SSE4_1" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_pextrd" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] + "TARGET_SSE4_1" + "pextrd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +;; It must come before *vec_extractv2di_1_sse since it is preferred. +(define_insn "*sse4_1_pextrq" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] + "TARGET_SSE4_1 && TARGET_64BIT" + "pextrq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "sse2_pshufd" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "sse2_pshufd_1" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "")])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshufd\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pshuflw" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "sse2_pshuflw_1" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "") + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pshufhw" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], + GEN_INT (((mask >> 0) & 3) + 4), + GEN_INT (((mask >> 2) & 3) + 4), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "sse2_pshufhw_1" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (match_operand 2 "const_4_to_7_operand" "") + (match_operand 3 "const_4_to_7_operand" "") + (match_operand 4 "const_4_to_7_operand" "") + (match_operand 5 "const_4_to_7_operand" "")])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= (INTVAL (operands[2]) - 4) << 0; + mask |= (INTVAL (operands[3]) - 4) << 2; + mask |= (INTVAL (operands[4]) - 4) << 4; + mask |= (INTVAL (operands[5]) - 4) << 6; + operands[2] = GEN_INT (mask); + + return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_loadd" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_merge:V4SI + (vec_duplicate:V4SI + (match_operand:SI 1 "nonimmediate_operand" "")) + (match_dup 2) + (const_int 1)))] + "TARGET_SSE" + "operands[2] = CONST0_RTX (V4SImode);") + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn "sse2_loadld" + [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x") + (vec_merge:V4SI + (vec_duplicate:V4SI + (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x")) + (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0") + (const_int 1)))] + "TARGET_SSE" + "@ + movd\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI,TI,V4SF,SF")]) +;; APPLE LOCAL end 5612787 mainline sse4 + +;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must +;; be taken into account, and movdi isn't fully populated even without. +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_insn_and_split "sse2_stored" + [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x,Yi") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "#" + "&& reload_completed + && (TARGET_INTER_UNIT_MOVES + || MEM_P (operands [0]) + || !GENERAL_REGNO_P (true_regnum (operands [0])))" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); +}) +;; APPLE LOCAL end 5612787 mainline sse4 + +(define_expand "sse_storeq" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "") + +;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must +;; be taken into account, and movdi isn't fully populated even without. +(define_insn "*sse2_storeq" + [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); +}) + +(define_insn "*vec_extractv2di_1_sse2" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhps\t{%1, %0|%0, %1} + psrldq\t{$8, %0|%0, 8} + movq\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov,sseishft,ssemov") + (set_attr "memory" "*,none,*") + (set_attr "mode" "V2SF,TI,TI")]) + +;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva +(define_insn "*vec_extractv2di_1_sse" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 1)])))] + "!TARGET_SSE2 && TARGET_SSE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "*vec_dupv4si" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V4SI 0 "register_operand" "=Yt,x") + (vec_duplicate:V4SI + (match_operand:SI 1 "register_operand" " Yt,0")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + pshufd\t{$0, %1, %0|%0, %1, 0} + shufps\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "type" "sselog1") + (set_attr "mode" "TI,V4SF")]) + +(define_insn "*vec_dupv2di" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V2DI 0 "register_operand" "=Yt,x") + (vec_duplicate:V2DI + (match_operand:DI 1 "register_operand" " 0 ,0")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + punpcklqdq\t%0, %0 + movlhps\t%0, %0" + [(set_attr "type" "sselog1,ssemov") + (set_attr "mode" "TI,V4SF")]) + +;; ??? In theory we can match memory for the MMX alternative, but allowing +;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE +;; alternatives pretty much forces the MMX alternative to be chosen. +(define_insn "*sse2_concatv2si" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y") + (vec_concat:V2SI + (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm") + (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE2" + "@ + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") + (set_attr "mode" "TI,TI,DI,DI")]) + +(define_insn "*sse1_concatv2si" + [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") + (vec_concat:V2SI + (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") + (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] + "TARGET_SSE" + "@ + unpcklps\t{%2, %0|%0, %2} + movss\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") + (set_attr "mode" "V4SF,V4SF,DI,DI")]) + +(define_insn "*vec_concatv4si_1" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x") + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" " 0 ,0,0") + (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + punpcklqdq\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "TI,V4SF,V2SF")]) + +(define_insn "*vec_concatv2di" +;; APPLE LOCAL begin 5612787 mainline sse4 + [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x") + (vec_concat:V2DI + (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m") + /* APPLE LOCAL mainline */ + (match_operand:DI 2 "nonimmediate_or_0_operand" " C, C,Yt,x,m,0")))] +;; APPLE LOCAL end 5612787 mainline sse4 + "TARGET_SSE" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + punpcklqdq\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") + (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) + +(define_expand "vec_setv2di" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv2di" + [(match_operand:DI 0 "register_operand" "") + (match_operand:V2DI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv2di" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv4si" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv4si" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv4si" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv8hi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv8hi" + [(match_operand:HI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv8hi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:QI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv16qi" + [(match_operand:QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + ix86_expand_vector_extract (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_initv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (false, operands[0], operands[1]); + DONE; +}) + +;; APPLE LOCAL begin 5612787 mainline sse4 +(define_expand "vec_unpacku_hi_v16qi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); + DONE; +}) + +(define_expand "vec_unpacks_hi_v16qi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_unpacku_lo_v16qi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacks_lo_v16qi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); + DONE; +}) + +(define_expand "vec_unpacku_hi_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); + DONE; +}) + +(define_expand "vec_unpacks_hi_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_unpacku_lo_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacks_lo_v8hi" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); + DONE; +}) + +(define_expand "vec_unpacku_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); + DONE; +}) + +(define_expand "vec_unpacks_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_unpacku_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacks_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "")] + "TARGET_SSE2" +{ + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); + DONE; +}) +;; APPLE LOCAL end 5612787 mainline sse4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Miscellaneous +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_uavgv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (truncate:V16QI + (lshiftrt:V16HI + (plus:V16HI + (plus:V16HI + (zero_extend:V16HI + (match_operand:V16QI 1 "nonimmediate_operand" "%0")) + (zero_extend:V16HI + (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) + (const_vector:V16QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_uavgv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (plus:V8SI + (plus:V8SI + (zero_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; The correct representation for this is absolutely enormous, and +;; surely not generally useful. +(define_insn "sse2_psadbw" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSADBW))] + "TARGET_SSE2" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse_movmskps" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "movmskps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_movmskpd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "movmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_maskmovdqu" + [(set (match_operand:V16QI 0 "memory_operand" "") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_maskmovdqu" + [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (mem:V16QI (match_dup 0))] + UNSPEC_MASKMOV))] + "TARGET_SSE2 && !TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_maskmovdqu_rex64" + [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (mem:V16QI (match_dup 0))] + UNSPEC_MASKMOV))] + "TARGET_SSE2 && TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse_ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] + UNSPECV_LDMXCSR)] + "TARGET_SSE" + "ldmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "load")]) + +(define_insn "sse_stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] + "TARGET_SSE" + "stmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "store")]) + +(define_expand "sse_sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse_sfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" + "sfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_insn "sse2_clflush" + [(unspec_volatile [(match_operand 0 "address_operand" "p")] + UNSPECV_CLFLUSH)] + "TARGET_SSE2" + "clflush\t%a0" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse2_mfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_lfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse2_lfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_insn "sse3_mwait" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c")] + UNSPECV_MWAIT)] + "TARGET_SSE3" +;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. +;; Since 32bit register operands are implicitly zero extended to 64bit, +;; we only need to set up 32bit registers. + "mwait" + [(set_attr "length" "3")]) + +(define_insn "sse3_monitor" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_SSE3 && !TARGET_64BIT" + "monitor\t%0, %1, %2" + [(set_attr "length" "3")]) +; APPLE LOCAL begin mainline +;; SSSE3 +(define_insn "ssse3_phaddwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phadddv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phadddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phaddswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubdv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubdv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmaddubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI + (mult:V8HI + (zero_extend:V8HI + (vec_select:V4QI + ;; APPLE LOCAL 6025404 + (match_operand:V16QI 1 "nonimmediate_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)])))) + (mult:V8HI + (zero_extend:V8HI + (vec_select:V16QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))) + (sign_extend:V8HI + (vec_select:V16QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmaddubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI + (mult:V4HI + (zero_extend:V4HI + (vec_select:V4QI + ;; APPLE LOCAL 6025404 + (match_operand:V8QI 1 "nonimmediate_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4HI + (zero_extend:V4HI + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmulhrswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (plus:V8SI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 14)) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmulhrswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (lshiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 14)) + (const_vector:V4HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pshufbv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_psign<mode>3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign<ssevecsize>\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "ssse3_psign<mode>3" + [(set (match_operand:MMXMODE124 0 "register_operand" "=y") + (unspec:MMXMODE124 [(match_operand:MMXMODE124 1 "register_operand" "0") + (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign<mmxvecsize>\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "ssse3_palignrti" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI [(match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "ssse3_palignrv1di" + [(set (match_operand:V1DI 0 "register_operand" "=y") + (unspec:V1DI [(match_operand:V1DI 1 "register_operand" "0") + (match_operand:V1DI 2 "nonimmediate_operand" "ym") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 + +(define_insn "ssse3_pabs<mode>2" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] + "TARGET_SSSE3" + "pabs<ssevecsize>\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +;; APPLE LOCAL begin 4656532 use V1DImode for _m64 +(define_insn "ssse3_pabs<mode>2" + [(set (match_operand:MMXMODE124 0 "register_operand" "=y") + (abs:MMXMODE124 (match_operand:MMXMODE124 1 "nonimmediate_operand" "ym")))] + "TARGET_SSSE3" + "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) +;; APPLE LOCAL end 4656532 use V1DImode for _m64 +; APPLE LOCAL end mainline + +(define_insn "sse3_monitor64" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_SSE3 && TARGET_64BIT" +;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in +;; RCX and RDX are used. Since 32bit register operands are implicitly +;; zero extended to 64bit, we only need to set up 32bit registers. + "monitor" + [(set_attr "length" "3")]) + +;; APPLE LOCAL begin 5612787 mainline sse4 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; AMD SSE4A instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse4a_vmmovntv2df" + [(set (match_operand:DF 0 "memory_operand" "=m") + (unspec:DF [(vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))] + UNSPEC_MOVNT))] + "TARGET_SSE4A" + "movntsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "DF")]) + +(define_insn "sse4a_movntdf" + [(set (match_operand:DF 0 "memory_operand" "=m") + (unspec:DF [(match_operand:DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE4A" + "movntsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "DF")]) + +(define_insn "sse4a_vmmovntv4sf" + [(set (match_operand:SF 0 "memory_operand" "=m") + (unspec:SF [(vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))] + UNSPEC_MOVNT))] + "TARGET_SSE4A" + "movntss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn "sse4a_movntsf" + [(set (match_operand:SF 0 "memory_operand" "=m") + (unspec:SF [(match_operand:SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE4A" + "movntss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn "sse4a_extrqi" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")] + UNSPEC_EXTRQI))] + "TARGET_SSE4A" + "extrq\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_extrq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_EXTRQ))] + "TARGET_SSE4A" + "extrq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_insertqi" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")] + UNSPEC_INSERTQI))] + "TARGET_SSE4A" + "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" + [(set_attr "type" "sseins") + (set_attr "prefix_rep" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_insertq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x")] + UNSPEC_INSERTQ))] + "TARGET_SSE4A" + "insertq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseins") + (set_attr "prefix_rep" "1") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Intel SSE4.1 instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse4_1_blendpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:SI 3 "const_0_to_3_operand" "n")))] + "TARGET_SSE4_1" + "blendpd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V2DF")]) + +(define_insn "sse4_1_blendps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:SI 3 "const_0_to_15_operand" "n")))] + "TARGET_SSE4_1" + "blendps\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_blendvpd" + [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0") + (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm") + (match_operand:V2DF 3 "register_operand" "Y0")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1" + "blendvpd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V2DF")]) + +(define_insn "sse4_1_blendvps" + [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0") + (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm") + (match_operand:V4SF 3 "register_operand" "Y0")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1" + "blendvps\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_dppd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_DP))] + "TARGET_SSE4_1" + "dppd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemul") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V2DF")]) + +(define_insn "sse4_1_dpps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_DP))] + "TARGET_SSE4_1" + "dpps\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemul") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_movntdqa" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")] + UNSPEC_MOVNTDQA))] + "TARGET_SSE4_1" + "movntdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_mpsadbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_MPSADBW))] + "TARGET_SSE4_1" + "mpsadbw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_packusdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (us_truncate:V4HI + (match_operand:V4SI 1 "register_operand" "0")) + (us_truncate:V4HI + (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE4_1" + "packusdw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_pblendvb" + [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0") + (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm") + (match_operand:V16QI 3 "register_operand" "Y0")] + UNSPEC_BLENDV))] + "TARGET_SSE4_1" + "pblendvb\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_pblendw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 3 "const_0_to_255_operand" "n")))] + "TARGET_SSE4_1" + "pblendw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_phminposuw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] + UNSPEC_PHMINPOSUW))] + "TARGET_SSE4_1" + "phminposuw\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv8qiv8hi2" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)]))))] + "TARGET_SSE4_1" + "pmovsxbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv8qiv8hi2" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (sign_extend:V8HI + (vec_select:V8QI + (vec_duplicate:V16QI + (match_operand:V8QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)]))))] + "TARGET_SSE4_1" + "pmovsxbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv4qiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (sign_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovsxbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv4qiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (sign_extend:V4SI + (vec_select:V4QI + (vec_duplicate:V16QI + (match_operand:V4QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovsxbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv2qiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv2qiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2QI + (vec_duplicate:V16QI + (match_operand:V2QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv4hiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovsxwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv4hiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (sign_extend:V4SI + (vec_select:V4HI + (vec_duplicate:V8HI + (match_operand:V2HI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovsxwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv2hiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv2hiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2HI + (vec_duplicate:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_extendv2siv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_extendv2siv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (sign_extend:V2DI + (vec_select:V2SI + (vec_duplicate:V4SI + (match_operand:V2SI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovsxdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv8qiv8hi2" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)]))))] + "TARGET_SSE4_1" + "pmovzxbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv8qiv8hi2" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (zero_extend:V8HI + (vec_select:V8QI + (vec_duplicate:V16QI + (match_operand:V8QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)]))))] + "TARGET_SSE4_1" + "pmovzxbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv4qiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (zero_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovzxbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv4qiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (zero_extend:V4SI + (vec_select:V4QI + (vec_duplicate:V16QI + (match_operand:V4QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovzxbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv2qiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv2qiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2QI + (vec_duplicate:V16QI + (match_operand:V2QI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv4hiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovzxwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv4hiv4si2" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (zero_extend:V4SI + (vec_select:V4HI + (vec_duplicate:V8HI + (match_operand:V4HI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_SSE4_1" + "pmovzxwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv2hiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv2hiv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2HI + (vec_duplicate:V8HI + (match_operand:V2HI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_zero_extendv2siv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "*sse4_1_zero_extendv2siv2di2" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (zero_extend:V2DI + (vec_select:V2SI + (vec_duplicate:V4SI + (match_operand:V2SI 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE4_1" + "pmovzxdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. +;; But it is not a really compare instruction. +(define_insn "sse4_1_ptest" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") + (match_operand:V2DI 1 "nonimmediate_operand" "xm")] + UNSPEC_PTEST))] + "TARGET_SSE4_1" + "ptest\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_roundpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_SSE4_1" + "roundpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V2DF")]) + +(define_insn "sse4_1_roundps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_SSE4_1" + "roundps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +(define_insn "sse4_1_roundsd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_ROUND) + (match_operand:V2DF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE4_1" + "roundsd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V2DF")]) + +(define_insn "sse4_1_roundss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_ROUND) + (match_operand:V4SF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE4_1" + "roundss\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "V4SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Intel SSE4.2 string/text processing instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_and_split "sse4_2_pcmpestr" + [(set (match_operand:SI 0 "register_operand" "=c,c") + (unspec:SI + [(match_operand:V16QI 2 "register_operand" "x,x") + (match_operand:SI 3 "register_operand" "a,a") + (match_operand:V16QI 4 "nonimmediate_operand" "x,m") + (match_operand:SI 5 "register_operand" "d,d") + (match_operand:SI 6 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPESTR)) + (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0") + (unspec:V16QI + [(match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5) + (match_dup 6)] + UNSPEC_PCMPESTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5) + (match_dup 6)] + UNSPEC_PCMPESTR))] + "TARGET_SSE4_2 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); + int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); + int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); + + if (ecx) + emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], + operands[3], operands[4], + operands[5], operands[6])); + if (xmm0) + emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], + operands[3], operands[4], + operands[5], operands[6])); + if (flags && !(ecx || xmm0)) + emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3], + operands[4], operands[5], + operands[6])); + DONE; +} + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpestri" + [(set (match_operand:SI 0 "register_operand" "=c,c") + (unspec:SI + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:SI 2 "register_operand" "a,a") + (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:SI 4 "register_operand" "d,d") + (match_operand:SI 5 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPESTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMPESTR))] + "TARGET_SSE4_2" + "pcmpestri\t{%5, %3, %1|%1, %3, %5}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpestrm" + [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0") + (unspec:V16QI + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:SI 2 "register_operand" "a,a") + (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:SI 4 "register_operand" "d,d") + (match_operand:SI 5 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPESTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMPESTR))] + "TARGET_SSE4_2" + "pcmpestrm\t{%5, %3, %1|%1, %3, %5}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpestr_cconly" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:V16QI 0 "register_operand" "x,x,x,x") + (match_operand:SI 1 "register_operand" "a,a,a,a") + (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m") + (match_operand:SI 3 "register_operand" "d,d,d,d") + (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] + UNSPEC_PCMPESTR)) + (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X")) + (clobber (match_scratch:SI 6 "= X, X,c,c"))] + "TARGET_SSE4_2" + "@ + pcmpestrm\t{%4, %2, %0|%0, %2, %4} + pcmpestrm\t{%4, %2, %0|%0, %2, %4} + pcmpestri\t{%4, %2, %0|%0, %2, %4} + pcmpestri\t{%4, %2, %0|%0, %2, %4}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load,none,load") + (set_attr "mode" "TI")]) + +(define_insn_and_split "sse4_2_pcmpistr" + [(set (match_operand:SI 0 "register_operand" "=c,c") + (unspec:SI + [(match_operand:V16QI 2 "register_operand" "x,x") + (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:SI 4 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPISTR)) + (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0") + (unspec:V16QI + [(match_dup 2) + (match_dup 3) + (match_dup 4)] + UNSPEC_PCMPISTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 2) + (match_dup 3) + (match_dup 4)] + UNSPEC_PCMPISTR))] + "TARGET_SSE4_2 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); + int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); + int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); + + if (ecx) + emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], + operands[3], operands[4])); + if (xmm0) + emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], + operands[3], operands[4])); + if (flags && !(ecx || xmm0)) + emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3], + operands[4])); + DONE; +} + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpistri" + [(set (match_operand:SI 0 "register_operand" "=c,c") + (unspec:SI + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPISTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMPISTR))] + "TARGET_SSE4_2" + "pcmpistri\t{%3, %2, %1|%1, %2, %3}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpistrm" + [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0") + (unspec:V16QI + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + UNSPEC_PCMPISTR)) + (set (reg:CC FLAGS_REG) + (unspec:CC + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMPISTR))] + "TARGET_SSE4_2" + "pcmpistrm\t{%3, %2, %1|%1, %2, %3}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load") + (set_attr "mode" "TI")]) + +(define_insn "sse4_2_pcmpistr_cconly" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:V16QI 0 "register_operand" "x,x,x,x") + (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m") + (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")] + UNSPEC_PCMPISTR)) + (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X")) + (clobber (match_scratch:SI 4 "= X, X,c,c"))] + "TARGET_SSE4_2" + "@ + pcmpistrm\t{%2, %1, %0|%0, %1, %2} + pcmpistrm\t{%2, %1, %0|%0, %1, %2} + pcmpistri\t{%2, %1, %0|%0, %1, %2} + pcmpistri\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "memory" "none,load,none,load") + (set_attr "mode" "TI")]) +;; APPLE LOCAL end 5612787 mainline sse4 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/sync.md b/gcc-4.2.1-5666.3/gcc/config/i386/sync.md new file mode 100644 index 000000000..8c2fdb230 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/sync.md @@ -0,0 +1,291 @@ +;; GCC machine description for i386 synchronization instructions. +;; Copyright (C) 2005, 2006 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +(define_mode_macro IMODE [QI HI SI (DI "TARGET_64BIT")]) +(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) +(define_mode_attr modeconstraint [(QI "q") (HI "r") (SI "r") (DI "r")]) +(define_mode_attr immconstraint [(QI "i") (HI "i") (SI "i") (DI "e")]) + +(define_mode_macro CASMODE [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_macro DCASMODE + [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_attr doublemodesuffix [(DI "8") (TI "16")]) +(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")]) + +;; ??? It would be possible to use cmpxchg8b on pentium for DImode +;; changes. It's complicated because the insn uses ecx:ebx as the +;; new value; note that the registers are reversed from the order +;; that they'd be in with (reg:DI 2 ecx). Similarly for TImode +;; data in 64-bit mode. + +(define_expand "sync_compare_and_swap<mode>" + [(parallel + [(set (match_operand:CASMODE 0 "register_operand" "") + (match_operand:CASMODE 1 "memory_operand" "")) + (set (match_dup 1) + (unspec_volatile:CASMODE + [(match_dup 1) + (match_operand:CASMODE 2 "register_operand" "") + (match_operand:CASMODE 3 "register_operand" "")] + UNSPECV_CMPXCHG_1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_CMPXCHG" +{ + if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode) + { + enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode; + rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0); + rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode, + GET_MODE_SIZE (hmode)); + low = force_reg (hmode, low); + high = force_reg (hmode, high); + if (<MODE>mode == DImode) + emit_insn (gen_sync_double_compare_and_swapdi + (operands[0], operands[1], operands[2], low, high)); + else if (<MODE>mode == TImode) + emit_insn (gen_sync_double_compare_and_swapti + (operands[0], operands[1], operands[2], low, high)); + else + gcc_unreachable (); + DONE; + } +}) + +(define_insn "*sync_compare_and_swap<mode>" + [(set (match_operand:IMODE 0 "register_operand" "=a") + (match_operand:IMODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:IMODE + [(match_dup 1) + (match_operand:IMODE 2 "register_operand" "a") + (match_operand:IMODE 3 "register_operand" "<modeconstraint>")] + UNSPECV_CMPXCHG_1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMPXCHG" + "lock\;cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}") + +(define_insn "sync_double_compare_and_swap<mode>" + [(set (match_operand:DCASMODE 0 "register_operand" "=A") + (match_operand:DCASMODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:DCASMODE + [(match_dup 1) + (match_operand:DCASMODE 2 "register_operand" "A") + (match_operand:<DCASHMODE> 3 "register_operand" "b") + (match_operand:<DCASHMODE> 4 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;cmpxchg<doublemodesuffix>b\t%1") + +;; Theoretically we'd like to use constraint "r" (any reg) for operand +;; 3, but that includes ecx. If operand 3 and 4 are the same (like when +;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like +;; operand 4. This breaks, as the xchg will move the PIC register contents +;; to %ecx then --> boom. Operands 3 and 4 really need to be different +;; registers, which in this case means operand 3 must not be ecx. +;; Instead of playing tricks with fake early clobbers or the like we +;; just enumerate all regs possible here, which (as this is !TARGET_64BIT) +;; are just esi and edi. +(define_insn "*sync_double_compare_and_swapdi_pic" + [(set (match_operand:DI 0 "register_operand" "=A") + (match_operand:DI 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:DI + [(match_dup 1) + (match_operand:DI 2 "register_operand" "A") + (match_operand:SI 3 "register_operand" "SD") + (match_operand:SI 4 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" + "xchg{l}\t%%ebx, %3\;lock\;cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") + +(define_expand "sync_compare_and_swap_cc<mode>" + [(parallel + [(set (match_operand:CASMODE 0 "register_operand" "") + (match_operand:CASMODE 1 "memory_operand" "")) + (set (match_dup 1) + (unspec_volatile:CASMODE + [(match_dup 1) + (match_operand:CASMODE 2 "register_operand" "") + (match_operand:CASMODE 3 "register_operand" "")] + UNSPECV_CMPXCHG_1)) + (set (match_dup 4) + (compare:CCZ + (unspec_volatile:CASMODE + [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG_2) + (match_dup 2)))])] + "TARGET_CMPXCHG" +{ + operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG); + ix86_compare_op0 = operands[3]; + ix86_compare_op1 = NULL; + ix86_compare_emitted = operands[4]; + if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode) + { + enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode; + rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0); + rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode, + GET_MODE_SIZE (hmode)); + low = force_reg (hmode, low); + high = force_reg (hmode, high); + if (<MODE>mode == DImode) + emit_insn (gen_sync_double_compare_and_swap_ccdi + (operands[0], operands[1], operands[2], low, high)); + else if (<MODE>mode == TImode) + emit_insn (gen_sync_double_compare_and_swap_ccti + (operands[0], operands[1], operands[2], low, high)); + else + gcc_unreachable (); + DONE; + } +}) + +(define_insn "*sync_compare_and_swap_cc<mode>" + [(set (match_operand:IMODE 0 "register_operand" "=a") + (match_operand:IMODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:IMODE + [(match_dup 1) + (match_operand:IMODE 2 "register_operand" "a") + (match_operand:IMODE 3 "register_operand" "<modeconstraint>")] + UNSPECV_CMPXCHG_1)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec_volatile:IMODE + [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG_2) + (match_dup 2)))] + "TARGET_CMPXCHG" + "lock\;cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}") + +(define_insn "sync_double_compare_and_swap_cc<mode>" + [(set (match_operand:DCASMODE 0 "register_operand" "=A") + (match_operand:DCASMODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:DCASMODE + [(match_dup 1) + (match_operand:DCASMODE 2 "register_operand" "A") + (match_operand:<DCASHMODE> 3 "register_operand" "b") + (match_operand:<DCASHMODE> 4 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec_volatile:DCASMODE + [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] + UNSPECV_CMPXCHG_2) + (match_dup 2)))] + "" + "lock\;cmpxchg<doublemodesuffix>b\t%1") + +;; See above for the explanation of using the constraint "SD" for +;; operand 3. +(define_insn "*sync_double_compare_and_swap_ccdi_pic" + [(set (match_operand:DI 0 "register_operand" "=A") + (match_operand:DI 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:DI + [(match_dup 1) + (match_operand:DI 2 "register_operand" "A") + (match_operand:SI 3 "register_operand" "SD") + (match_operand:SI 4 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec_volatile:DI + [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] + UNSPECV_CMPXCHG_2) + (match_dup 2)))] + "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" + "xchg{l}\t%%ebx, %3\;lock\;cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") + +(define_insn "sync_old_add<mode>" + [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>") + (unspec_volatile:IMODE + [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG)) + (set (match_dup 1) + (plus:IMODE (match_dup 1) + (match_operand:IMODE 2 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_XADD" + "lock\;xadd{<modesuffix>}\t{%0, %1|%1, %0}") + +;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. +(define_insn "sync_lock_test_and_set<mode>" + [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>") + (unspec_volatile:IMODE + [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG)) + (set (match_dup 1) + (match_operand:IMODE 2 "register_operand" "0"))] + "" + "xchg{<modesuffix>}\t{%1, %0|%0, %1}") + +(define_insn "sync_add<mode>" + [(set (match_operand:IMODE 0 "memory_operand" "+m") + (unspec_volatile:IMODE + [(plus:IMODE (match_dup 0) + (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))] + UNSPECV_LOCK)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;add{<modesuffix>}\t{%1, %0|%0, %1}") + +(define_insn "sync_sub<mode>" + [(set (match_operand:IMODE 0 "memory_operand" "+m") + (unspec_volatile:IMODE + [(minus:IMODE (match_dup 0) + (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))] + UNSPECV_LOCK)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;sub{<modesuffix>}\t{%1, %0|%0, %1}") + +(define_insn "sync_ior<mode>" + [(set (match_operand:IMODE 0 "memory_operand" "+m") + (unspec_volatile:IMODE + [(ior:IMODE (match_dup 0) + (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))] + UNSPECV_LOCK)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;or{<modesuffix>}\t{%1, %0|%0, %1}") + +(define_insn "sync_and<mode>" + [(set (match_operand:IMODE 0 "memory_operand" "+m") + (unspec_volatile:IMODE + [(and:IMODE (match_dup 0) + (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))] + UNSPECV_LOCK)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;and{<modesuffix>}\t{%1, %0|%0, %1}") + +(define_insn "sync_xor<mode>" + [(set (match_operand:IMODE 0 "memory_operand" "+m") + (unspec_volatile:IMODE + [(xor:IMODE (match_dup 0) + (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))] + UNSPECV_LOCK)) + (clobber (reg:CC FLAGS_REG))] + "" + "lock\;xor{<modesuffix>}\t{%1, %0|%0, %1}") diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin new file mode 100644 index 000000000..5e6df6912 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin @@ -0,0 +1,22 @@ +SHLIB_VERPFX = $(srcdir)/config/i386/darwin-libgcc +# APPLE LOCAL begin 4099000 +LIB1ASMSRC = i386/lib1funcs.asm +LIB1ASMFUNCS = _get_pc_thunk_ax _get_pc_thunk_dx _get_pc_thunk_cx _get_pc_thunk_bx _get_pc_thunk_si _get_pc_thunk_di _get_pc_thunk_bp +# APPLE LOCAL end 4099000 +# APPLE LOCAL avoid try fat on thin system +ifneq ($(shell lipo -info /usr/lib/libSystem.B.dylib | grep x86_64),) +MULTILIB_OPTIONS = m64 +MULTILIB_DIRNAMES = x86_64 +# APPLE LOCAL avoid try fat on thin system +endif +LIB2_SIDITI_CONV_FUNCS=yes +LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c + +# APPLE LOCAL begin gcov 5573505 +# -pipe because there's an assembler bug, 4077127, which causes +# it to not properly process the first # directive, causing temporary +# file names to appear in stabs, causing the bootstrap to fail. Using -pipe +# works around this by not having any temporary file names. +TARGET_LIBGCC2_CFLAGS = -fPIC -pipe +TARGET_LIBGCC2_STATIC_CFLAGS = -mmacosx-version-min=10.4 +# APPLE LOCAL end gcov 5573505 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64 b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64 new file mode 100644 index 000000000..3670a125b --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64 @@ -0,0 +1,12 @@ +SHLIB_VERPFX = $(srcdir)/config/i386/darwin-libgcc +LIB2_SIDITI_CONV_FUNCS=yes +LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c + +# APPLE LOCAL begin gcov 5573505 +# -pipe because there's an assembler bug, 4077127, which causes +# it to not properly process the first # directive, causing temporary +# file names to appear in stabs, causing the bootstrap to fail. Using -pipe +# works around this by not having any temporary file names. +TARGET_LIBGCC2_CFLAGS = -fPIC -pipe +TARGET_LIBGCC2_STATIC_CFLAGS = -mmacosx-version-min=10.4 +# APPLE LOCAL end gcov 5573505 diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc b/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc new file mode 100644 index 000000000..c37f8a759 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc @@ -0,0 +1,6 @@ +# Install gmm_malloc.h as mm_malloc.h. + +EXTRA_HEADERS += mm_malloc.h +mm_malloc.h: $(srcdir)/config/i386/gmm_malloc.h + rm -f $@ + cat $^ > $@ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h new file mode 100644 index 000000000..1bb254bfe --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h @@ -0,0 +1,304 @@ +/* APPLE LOCAL file ssse3 4424835 */ +/* Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.1. */ + +#ifndef _TMMINTRIN_H_INCLUDED +#define _TMMINTRIN_H_INCLUDED + +#ifdef __SSSE3__ +#include <pmmintrin.h> + +/* APPLE LOCAL begin nodebug inline */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadds_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadd_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hadds_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsub_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_hsubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_maddubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_maddubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhrs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhrs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_shuffle_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_shuffle_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sign_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); +} + +/* APPLE LOCAL begin 5814283 */ +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di)(__X), (__v2di)(__Y), (__N) * 8)) +/* APPLE LOCAL end 5814283 */ + +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_epi8 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128i __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_pi8 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_pi16 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_abs_pi32 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsd ((__v2si)__X); +} + +/* APPLE LOCAL begin nodebug inline */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline */ + +#endif /* __SSSE3__ */ + +#endif /* _TMMINTRIN_H_INCLUDED */ diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin new file mode 100644 index 000000000..025c5f4fb --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin @@ -0,0 +1,8 @@ +host-i386-darwin.o : $(srcdir)/config/i386/host-i386-darwin.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \ + config/host-darwin.h + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< +# APPLE LOCAL begin use -mdynamic-no-pic to build x86-hosted compilers + +XCFLAGS = -mdynamic-no-pic +# APPLE LOCAL end use -mdynamic-no-pic to build x86-hosted compilers diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64 b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64 new file mode 100644 index 000000000..58e9f6753 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64 @@ -0,0 +1,8 @@ +# APPLE LOCAL file mainline +# This file should go away. +host-x86_64-darwin.o : $(srcdir)/config/i386/host-x86_64-darwin.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \ + config/host-darwin.h + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +XCFLAGS = diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-i386 b/gcc-4.2.1-5666.3/gcc/config/i386/x-i386 new file mode 100644 index 000000000..e156bcde3 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-i386 @@ -0,0 +1,3 @@ +driver-i386.o : $(srcdir)/config/i386/driver-i386.c \ + $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h new file mode 100644 index 000000000..ad805b866 --- /dev/null +++ b/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h @@ -0,0 +1,1582 @@ +/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */ +/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef _XMMINTRIN_H_INCLUDED +#define _XMMINTRIN_H_INCLUDED + +#ifndef __SSE__ +# error "SSE instruction set not enabled" +#else + +/* We need type definitions from the MMX header file. */ +#include <mmintrin.h> + +/* Get _mm_malloc () and _mm_free (). */ +/* APPLE LOCAL begin xmmintrin.h for kernel 4123064 */ +#if __STDC_HOSTED__ +#include <mm_malloc.h> +#endif +/* APPLE LOCAL end xmmintrin.h for kernel 4123064 */ + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Internal data types for implementing the intrinsics. */ +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +/* Create a selector for use with the SHUFPS instruction. */ +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) + +/* Constants for use with _mm_prefetch. */ +enum _mm_hint +{ + _MM_HINT_T0 = 3, + _MM_HINT_T1 = 2, + _MM_HINT_T2 = 1, + _MM_HINT_NTA = 0 +}; + +/* Bits in the MXCSR. */ +#define _MM_EXCEPT_MASK 0x003f +#define _MM_EXCEPT_INVALID 0x0001 +#define _MM_EXCEPT_DENORM 0x0002 +#define _MM_EXCEPT_DIV_ZERO 0x0004 +#define _MM_EXCEPT_OVERFLOW 0x0008 +#define _MM_EXCEPT_UNDERFLOW 0x0010 +#define _MM_EXCEPT_INEXACT 0x0020 + +#define _MM_MASK_MASK 0x1f80 +#define _MM_MASK_INVALID 0x0080 +#define _MM_MASK_DENORM 0x0100 +#define _MM_MASK_DIV_ZERO 0x0200 +#define _MM_MASK_OVERFLOW 0x0400 +#define _MM_MASK_UNDERFLOW 0x0800 +#define _MM_MASK_INEXACT 0x1000 + +#define _MM_ROUND_MASK 0x6000 +#define _MM_ROUND_NEAREST 0x0000 +#define _MM_ROUND_DOWN 0x2000 +#define _MM_ROUND_UP 0x4000 +#define _MM_ROUND_TOWARD_ZERO 0x6000 + +#define _MM_FLUSH_ZERO_MASK 0x8000 +#define _MM_FLUSH_ZERO_ON 0x8000 +#define _MM_FLUSH_ZERO_OFF 0x0000 + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#define __always_inline__ __always_inline__, __nodebug__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* APPLE LOCAL begin radar 5618945 */ +#undef __STATIC_INLINE +#ifdef __GNUC_STDC_INLINE__ +#define __STATIC_INLINE __inline +#else +#define __STATIC_INLINE static __inline +#endif +/* APPLE LOCAL end radar 5618945 */ + +/* Create a vector of zeros. */ +/* APPLE LOCAL begin radar 4152603 */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setzero_ps (void) +{ + return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; +} + +/* Perform the respective operation on the lower SPFP (single-precision + floating-point) values of A and B; the upper three SPFP values are + passed through from A. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_div_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sqrt_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_rcp_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_rcpss ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_rsqrt_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B); +} + +/* Perform the respective operation on the four SPFP values in A and B. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_add_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sub_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mul_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_div_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sqrt_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_rcp_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_rcpps ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_rsqrt_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B); +} + +/* Perform logical bit-wise operations on 128-bit values. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_and_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_andps (__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_andnot_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_andnps (__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_or_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_orps (__A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_xor_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_xorps (__A, __B); +} + +/* Perform a comparison on the lower SPFP values of A and B. If the + comparison is true, place a mask of all ones in the result, otherwise a + mask of zeros. The upper three SPFP values are passed through from A. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmple_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpltss ((__v4sf) __B, + (__v4sf) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpge_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpless ((__v4sf) __B, + (__v4sf) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpneq_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnlt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnle_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpngt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnltss ((__v4sf) __B, + (__v4sf) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnge_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnless ((__v4sf) __B, + (__v4sf) + __A)); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpord_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpunord_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B); +} + +/* Perform a comparison on the four SPFP values of A and B. For each + element, if the comparison is true, place a mask of all ones in the + result, otherwise a mask of zeros. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpeq_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmplt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmple_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpgt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpge_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpneq_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnlt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnle_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpngt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpnge_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpord_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cmpunord_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); +} + +/* Compare the lower SPFP values of A and B and return 1 if true + and 0 if false. */ + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comieq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comilt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comile_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comigt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comige_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_comineq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomieq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomilt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomile_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomigt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomige_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_ucomineq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B); +} + +/* Convert the lower SPFP value to a 32-bit integer according to the current + rounding mode. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtss_si32 (__m128 __A) +{ + return __builtin_ia32_cvtss2si ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvt_ss2si (__m128 __A) +{ + return _mm_cvtss_si32 (__A); +} + +#ifdef __x86_64__ +/* Convert the lower SPFP value to a 32-bit integer according to the + current rounding mode. */ + +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtss_si64 (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtss_si64x (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); +} +#endif + +/* Convert the two lower SPFP values to 32-bit integers according to the + current rounding mode. Return the integers in packed form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtps_pi32 (__m128 __A) +{ + return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvt_ps2pi (__m128 __A) +{ + return _mm_cvtps_pi32 (__A); +} + +/* Truncate the lower SPFP value to a 32-bit integer. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttss_si32 (__m128 __A) +{ + return __builtin_ia32_cvttss2si ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtt_ss2si (__m128 __A) +{ + return _mm_cvttss_si32 (__A); +} + +#ifdef __x86_64__ +/* Truncate the lower SPFP value to a 32-bit integer. */ + +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttss_si64 (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE long long __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttss_si64x (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); +} +#endif + +/* Truncate the two lower SPFP values to 32-bit integers. Return the + integers in packed form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvttps_pi32 (__m128 __A) +{ + return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtt_ps2pi (__m128 __A) +{ + return _mm_cvttps_pi32 (__A); +} + +/* Convert B to a SPFP value and insert it as element zero in A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi32_ss (__m128 __A, int __B) +{ + return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvt_si2ss (__m128 __A, int __B) +{ + return _mm_cvtsi32_ss (__A, __B); +} + +#ifdef __x86_64__ +/* Convert B to a SPFP value and insert it as element zero in A. */ + +/* Intel intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); +} + +/* Microsoft intrinsic. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); +} +#endif + +/* Convert the two 32-bit values in B to SPFP form and insert them + as the two lower elements in A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpi32_ps (__m128 __A, __m64 __B) +{ + return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvt_pi2ps (__m128 __A, __m64 __B) +{ + return _mm_cvtpi32_ps (__A, __B); +} + +/* Convert the four signed 16-bit values in A to SPFP form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpi16_ps (__m64 __A) +{ + __v4hi __sign; + __v2si __hisi, __losi; + __v4sf __r; + + /* This comparison against zero gives us a mask that can be used to + fill in the missing sign bits in the unpack operations below, so + that we get signed values after unpacking. */ + __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); + + /* Convert the four words to doublewords. */ + __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); + + /* Convert the doublewords to floating point two at a time. */ + __r = (__v4sf) _mm_setzero_ps (); + __r = __builtin_ia32_cvtpi2ps (__r, __hisi); + __r = __builtin_ia32_movlhps (__r, __r); + __r = __builtin_ia32_cvtpi2ps (__r, __losi); + + return (__m128) __r; +} + +/* Convert the four unsigned 16-bit values in A to SPFP form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpu16_ps (__m64 __A) +{ + __v2si __hisi, __losi; + __v4sf __r; + + /* Convert the four words to doublewords. */ + __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); + + /* Convert the doublewords to floating point two at a time. */ + __r = (__v4sf) _mm_setzero_ps (); + __r = __builtin_ia32_cvtpi2ps (__r, __hisi); + __r = __builtin_ia32_movlhps (__r, __r); + __r = __builtin_ia32_cvtpi2ps (__r, __losi); + + return (__m128) __r; +} + +/* Convert the low four signed 8-bit values in A to SPFP form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpi8_ps (__m64 __A) +{ + __v8qi __sign; + + /* This comparison against zero gives us a mask that can be used to + fill in the missing sign bits in the unpack operations below, so + that we get signed values after unpacking. */ + __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A); + + /* Convert the four low bytes to words. */ + __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign); + + return _mm_cvtpi16_ps(__A); +} + +/* Convert the low four unsigned 8-bit values in A to SPFP form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpu8_ps(__m64 __A) +{ + __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL); + return _mm_cvtpu16_ps(__A); +} + +/* Convert the four signed 32-bit values in A and B to SPFP form. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtpi32x2_ps(__m64 __A, __m64 __B) +{ + __v4sf __zero = (__v4sf) _mm_setzero_ps (); + __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A); + __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B); + return (__m128) __builtin_ia32_movlhps (__sfa, __sfb); +} + +/* Convert the four SPFP values in A to four signed 16-bit integers. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtps_pi16(__m128 __A) +{ + __v4sf __hisf = (__v4sf)__A; + __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); + __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); + __v2si __losi = __builtin_ia32_cvtps2pi (__losf); + return (__m64) __builtin_ia32_packssdw (__hisi, __losi); +} + +/* Convert the four SPFP values in A to four signed 8-bit integers. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtps_pi8(__m128 __A) +{ + __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A); + return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL); +} + +/* Selects four specific SPFP values from A and B based on MASK. */ +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask) +{ + return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); +} +#else +#define _mm_shuffle_ps(A, B, MASK) \ + ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK))) +#endif + + +/* Selects and interleaves the upper two SPFP values from A and B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpackhi_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B); +} + +/* Selects and interleaves the lower two SPFP values from A and B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_unpacklo_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B); +} + +/* Sets the upper two SPFP values with 64-bits of data loaded from P; + the lower two values are passed through from A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadh_pi (__m128 __A, __m64 const *__P) +{ + return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P); +} + +/* Stores the upper two SPFP values of A into P. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storeh_pi (__m64 *__P, __m128 __A) +{ + __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A); +} + +/* Moves the upper two values of B into the lower two values of A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movehl_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B); +} + +/* Moves the lower two values of B into the upper two values of A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movelh_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B); +} + +/* Sets the lower two SPFP values with 64-bits of data loaded from P; + the upper two values are passed through from A. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadl_pi (__m128 __A, __m64 const *__P) +{ + return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P); +} + +/* Stores the lower two SPFP values of A into P. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storel_pi (__m64 *__P, __m128 __A) +{ + __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A); +} + +/* Creates a 4-bit mask from the most significant bits of the SPFP values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movemask_ps (__m128 __A) +{ + return __builtin_ia32_movmskps ((__v4sf)__A); +} + +/* Return the contents of the control register. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_getcsr (void) +{ + return __builtin_ia32_stmxcsr (); +} + +/* Read exception bits from the control register. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_GET_EXCEPTION_STATE (void) +{ + return _mm_getcsr() & _MM_EXCEPT_MASK; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_GET_EXCEPTION_MASK (void) +{ + return _mm_getcsr() & _MM_MASK_MASK; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_GET_ROUNDING_MODE (void) +{ + return _mm_getcsr() & _MM_ROUND_MASK; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE unsigned int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_GET_FLUSH_ZERO_MODE (void) +{ + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; +} + +/* Set the control register to I. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setcsr (unsigned int __I) +{ + __builtin_ia32_ldmxcsr (__I); +} + +/* Set exception bits in the control register. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_SET_EXCEPTION_STATE(unsigned int __mask) +{ + _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_SET_EXCEPTION_MASK (unsigned int __mask) +{ + _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_SET_ROUNDING_MODE (unsigned int __mode) +{ + _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode) +{ + _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode); +} + +/* Create a vector with element 0 as F and the rest zero. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_ss (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, 0, 0, 0 }; +} + +/* Create a vector with all four elements equal to F. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set1_ps (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F }; +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_ps1 (float __F) +{ + return _mm_set1_ps (__F); +} + +/* Create a vector with element 0 as *P and the rest zero. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_ss (float const *__P) +{ + return _mm_set_ss (*__P); +} + +/* Create a vector with all four elements equal to *P. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load1_ps (float const *__P) +{ + return _mm_set1_ps (*__P); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_ps1 (float const *__P) +{ + return _mm_load1_ps (__P); +} + +/* Load four SPFP values from P. The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_load_ps (float const *__P) +{ + return (__m128) *(__v4sf *)__P; +} + +/* Load four SPFP values from P. The address need not be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadu_ps (float const *__P) +{ + return (__m128) __builtin_ia32_loadups (__P); +} + +/* Load four SPFP values in reverse order. The address must be aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_loadr_ps (float const *__P) +{ + __v4sf __tmp = *(__v4sf *)__P; + return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3)); +} + +/* Create the vector [Z Y X W]. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) +{ + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; +} + +/* Create the vector [W X Y Z]. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_setr_ps (float __Z, float __Y, float __X, float __W) +{ + return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W }; +} + +/* Stores the lower SPFP value. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_ss (float *__P, __m128 __A) +{ + *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE float __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_cvtss_f32 (__m128 __A) +{ + return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); +} + +/* Store four SPFP values. The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_ps (float *__P, __m128 __A) +{ + *(__v4sf *)__P = (__v4sf)__A; +} + +/* Store four SPFP values. The address need not be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storeu_ps (float *__P, __m128 __A) +{ + __builtin_ia32_storeups (__P, (__v4sf)__A); +} + +/* Store the lower SPFP value across four words. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store1_ps (float *__P, __m128 __A) +{ + __v4sf __va = (__v4sf)__A; + __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0)); + _mm_storeu_ps (__P, __tmp); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_store_ps1 (float *__P, __m128 __A) +{ + _mm_store1_ps (__P, __A); +} + +/* Store four SPFP values in reverse order. The address must be aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_storer_ps (float *__P, __m128 __A) +{ + __v4sf __va = (__v4sf)__A; + __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3)); + _mm_store_ps (__P, __tmp); +} + +/* Sets the low SPFP value of A from the low value of B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m128 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_move_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B); +} + +/* Extracts one of the four words of A. The selector N must be immediate. */ +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_extract_pi16 (__m64 const __A, int const __N) +{ + return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pextrw (__m64 const __A, int const __N) +{ + return _mm_extract_pi16 (__A, __N); +} +#else +#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N)) +#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) +#endif + +/* Inserts word D into one of four words of A. The selector N must be + immediate. */ +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_insert_pi16 (__m64 const __A, int const __D, int const __N) +{ + return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pinsrw (__m64 const __A, int const __D, int const __N) +{ + return _mm_insert_pi16 (__A, __D, __N); +} +#else +#define _mm_insert_pi16(A, D, N) \ + ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N))) +#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) +#endif + +/* Compute the element-wise maximum of signed 16-bit values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmaxsw (__m64 __A, __m64 __B) +{ + return _mm_max_pi16 (__A, __B); +} + +/* Compute the element-wise maximum of unsigned 8-bit values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_max_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmaxub (__m64 __A, __m64 __B) +{ + return _mm_max_pu8 (__A, __B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pminsw (__m64 __A, __m64 __B) +{ + return _mm_min_pi16 (__A, __B); +} + +/* Compute the element-wise minimum of unsigned 8-bit values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_min_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pminub (__m64 __A, __m64 __B) +{ + return _mm_min_pu8 (__A, __B); +} + +/* Create an 8-bit mask of the signs of 8-bit values. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_movemask_pi8 (__m64 __A) +{ + return __builtin_ia32_pmovmskb ((__v8qi)__A); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE int __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmovmskb (__m64 __A) +{ + return _mm_movemask_pi8 (__A); +} + +/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values + in B and produce the high 16 bits of the 32-bit results. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_mulhi_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pmulhuw (__m64 __A, __m64 __B) +{ + return _mm_mulhi_pu16 (__A, __B); +} + +/* Return a combination of the four 16-bit values in A. The selector + must be an immediate. */ +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_shuffle_pi16 (__m64 __A, int __N) +{ + return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pshufw (__m64 __A, int __N) +{ + return _mm_shuffle_pi16 (__A, __N); +} +#else +#define _mm_shuffle_pi16(A, N) \ + ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) +#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) +#endif + +/* Conditionally store byte elements of A into P. The high bit of each + byte in the selector N determines whether the corresponding byte from + A is stored. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) +{ + __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_maskmovq (__m64 __A, __m64 __N, char *__P) +{ + _mm_maskmove_si64 (__A, __N, __P); +} + +/* Compute the rounded averages of the unsigned 8-bit values in A and B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_avg_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pavgb (__m64 __A, __m64 __B) +{ + return _mm_avg_pu8 (__A, __B); +} + +/* Compute the rounded averages of the unsigned 16-bit values in A and B. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_avg_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_pavgw (__m64 __A, __m64 __B) +{ + return _mm_avg_pu16 (__A, __B); +} + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sad_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B); +} + +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE __m64 __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_m_psadbw (__m64 __A, __m64 __B) +{ + return _mm_sad_pu8 (__A, __B); +} + +/* Loads one cache line from address P to a location "closer" to the + processor. The selector I specifies the type of prefetch operation. */ +#if 0 +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_prefetch (void *__P, enum _mm_hint __I) +{ + __builtin_prefetch (__P, 0, __I); +} +#else +#define _mm_prefetch(P, I) \ + __builtin_prefetch ((P), 0, (I)) +#endif + +/* Stores the data in A to the address P without polluting the caches. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_stream_pi (__m64 *__P, __m64 __A) +{ + /* APPLE LOCAL 4656532 use V1DImode for _m64 */ + __builtin_ia32_movntq (__P, __A); +} + +/* Likewise. The address must be 16-byte aligned. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_stream_ps (float *__P, __m128 __A) +{ + __builtin_ia32_movntps (__P, (__v4sf)__A); +} + +/* Guarantees that every preceding store is globally visible before + any subsequent store. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_sfence (void) +{ + __builtin_ia32_sfence (); +} + +/* The execution of the next instruction is delayed by an implementation + specific amount of time. The instruction does not modify the + architectural state. */ +/* APPLE LOCAL begin radar 5618945 */ +__STATIC_INLINE void __attribute__((__always_inline__)) +/* APPLE LOCAL end radar 5618945 */ +_mm_pause (void) +{ + __asm__ __volatile__ ("rep; nop" : : ); +} +/* APPLE LOCAL end radar 4152603 */ + +/* Transpose the 4x4 matrix composed of row[0-3]. */ +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ +do { \ + __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ + __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \ + __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \ + __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \ + __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \ + (row0) = __builtin_ia32_movlhps (__t0, __t1); \ + (row1) = __builtin_ia32_movhlps (__t1, __t0); \ + (row2) = __builtin_ia32_movlhps (__t2, __t3); \ + (row3) = __builtin_ia32_movhlps (__t3, __t2); \ +} while (0) + +/* APPLE LOCAL begin nodebug inline 4152603 */ +#undef __always_inline__ +/* APPLE LOCAL end nodebug inline 4152603 */ + +/* For backward source compatibility. */ +#include <emmintrin.h> + +#endif /* __SSE__ */ +#endif /* _XMMINTRIN_H_INCLUDED */ |