aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.2.1-5666.3/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.2.1-5666.3/gcc/config/i386')
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h106
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/athlon.md874
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h25
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/constraints.md171
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver81
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver85
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/darwin.h452
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt15
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h43
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c300
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h1981
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h77
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h7
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c31
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def97
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h261
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.c23515
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.h3230
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.md21399
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.opt262
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/k6.md268
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm30
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h220
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h1219
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/mmx.md1470
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h41
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/pentium.md312
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h172
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/ppro.md763
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/predicates.md1037
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h836
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/sse.md6218
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/sync.md291
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/t-darwin22
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/t-darwin6412
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc6
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h304
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/x-darwin8
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_648
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/x-i3863
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h1582
41 files changed, 67834 insertions, 0 deletions
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h
new file mode 100644
index 000000000..8a466d914
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/ammintrin.h
@@ -0,0 +1,106 @@
+/* APPLE LOCAL file 5612787 mainline sse4 */
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the AMD Programmers
+ Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4A__
+# error "SSE4A instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+__STATIC_INLINE void __attribute__((__always_inline__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+ __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+__STATIC_INLINE void __attribute__((__always_inline__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+ __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
+}
+#else
+#define _mm_extracti_si64(X, I, L) \
+ ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+#endif
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+ return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
+}
+#else
+#define _mm_inserti_si64(X, Y, I, L) \
+ ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+#endif
+
+#endif /* __SSE4A__ */
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md b/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md
new file mode 100644
index 000000000..6d92b948b
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/athlon.md
@@ -0,0 +1,874 @@
+;; AMD Athlon Scheduling
+;;
+;; The Athlon does contain three pipelined FP units, three integer units and
+;; three address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line. So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available. They can decode three DirectPath instructions or one VectorPath
+;; instruction per cycle.
+;; Decoded macro instructions are then passed to 72 entry instruction control
+;; unit, that passes
+;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+(define_attr "athlon_decode" "direct,vector,double"
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
+ (const_string "vector")
+ (and (eq_attr "type" "push")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF")))
+ (const_string "vector")]
+ (const_string "direct")))
+
+;;
+;; decode0 decode1 decode2
+;; \ | /
+;; instruction control unit (72 entry scheduler)
+;; | |
+;; integer scheduler (18) stack map
+;; / | | | | \ stack rename
+;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler
+;; | agu0 | agu1 agu2 register file
+;; | \ | | / | | |
+;; \ /\ | / fadd fmul fstore
+;; \ / \ | / fadd fmul fstore
+;; imul load/store (2x) fadd fmul fstore
+
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+ (athlon-decode0 | athlon-decode1
+ | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+ | (nothing,(athlon-decode0 + athlon-decode1))
+ | (nothing,(athlon-decode1 + athlon-decode2)))")
+
+;; Agu and ieu unit results in extremely large automatons and
+;; in our approximation they are hardly filled in. Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations. Skip the models.
+
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing")
+
+(define_cpu_unit "athlon-mult" "athlon_mult")
+
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+ (athlon-load0 | athlon-load1),nothing")
+;; 128bit SSE instructions issue two loads at once
+(define_reservation "athlon-load2" "athlon-agu,
+ (athlon-load0 + athlon-load1),nothing")
+
+(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
+;; 128bit SSE instructions issue two stores at once
+(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
+
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for Athlon and 11 for K8
+;; Compensate the difference for Athlon because it results in significantly
+;; smaller automata.
+(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
+(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
+
+
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
+
+
+;; Jump instructions are executed in the branch unit completely transparent to us
+(define_insn_reservation "athlon_branch" 0
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "ibr"))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_call" 0
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "call,callv"))
+ "athlon-vector,athlon-ieu")
+
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "push"))
+ "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "pop"))
+ "athlon-vector,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_pop_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "pop"))
+ "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "leave"))
+ "athlon-vector,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "leave"))
+ "athlon-double,(athlon-ieu+athlon-load)")
+
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu,nothing")
+
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "load,both"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+
+;; Idiv cannot execute in parallel with other instructions. Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
+;; of the other code
+
+(define_insn_reservation "athlon_idiv" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
+(define_insn_reservation "athlon_idiv_mem" 9
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
+;; The parallelism of string instructions is not documented. Model it same way
+;; as idiv to create smaller automata. This probably does not matter much.
+(define_insn_reservation "athlon_str" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both,store")))
+ "athlon-vector,athlon-load,athlon-ieu0*6")
+
+(define_insn_reservation "athlon_idirect" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_loadmov" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load")
+(define_insn_reservation "athlon_idirect_load" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_movstore" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_idirect_both" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,
+ athlon-ieu,athlon-store,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_both" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,
+ athlon-ieu,
+ athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_idirect_store" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,(athlon-ieu+athlon-agu),
+ athlon-store")
+(define_insn_reservation "athlon_ivector_store" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+ athlon-store")
+
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fpload2,athlon-fvector*9")
+(define_insn_reservation "athlon_fldxf_k8" 13
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
+;; Assume superforwarding to take place so effective latency of fany op is 0.
+(define_insn_reservation "athlon_fld" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_fld_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+
+(define_insn_reservation "athlon_fstxf" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
+(define_insn_reservation "athlon_fstxf_k8" 8
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
+(define_insn_reservation "athlon_fst" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fst_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fist" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fistp,fisttp"))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fmov" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fmov"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fop"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fmul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fsgn"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fpspc" 100
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fpspc"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fcmov" 7
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_k8" 15
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+;; fcomi is vector decoded by uses only one pipe.
+(define_insn_reservation "athlon_fcomi_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcomi_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "type" "fcmp")))
+ "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load_k8" 4
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "fcmp"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+;; Never seen by the scheduler because we still don't do post reg-stack
+;; scheduling.
+;(define_insn_reservation "athlon_fxch" 2
+; (and (eq_attr "cpu" "athlon,k8,generic64")
+; (eq_attr "type" "fxch"))
+; "athlon-direct,athlon-fpsched,athlon-fany")
+
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+
+(define_insn_reservation "athlon_movlpd_load" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_movlpd_load_k8" 2
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_movsd_load_generic64" 2
+ (and (eq_attr "cpu" "generic64")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
+(define_insn_reservation "athlon_movaps_load_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
+(define_insn_reservation "athlon_movss_load" 1
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "SF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload,(athlon-fany*2)")
+(define_insn_reservation "athlon_movss_load_k8" 1
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "SF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
+(define_insn_reservation "athlon_mmxsseld" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_mmxsseld_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_mmxssest" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_short" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_movaps_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "mode" "V4SF,V2DF,TI")))
+ "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
+(define_insn_reservation "athlon_movaps" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "mode" "V4SF,V2DF,TI")))
+ "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_mmxssemov" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "mmxmov,ssemov"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "mmxmul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "unit" "mmx")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "unit" "mmx"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well. The latency
+;; is same as for i387 operations for scalar operations
+
+(define_insn_reservation "athlon_sselog_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-vector,athlon-fpsched,athlon-fmul*2")
+(define_insn_reservation "athlon_sselog_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-double,athlon-fpsched,athlon-fmul")
+;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
+(define_insn_reservation "athlon_ssecmp_load" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp_load_k8" 4
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF,DI,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "mode" "SF,DF,DI,TI")))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssecmp"))
+ "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecomi_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "mode" "SF,DF,DI")))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sseadd"))
+ "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "sseadd"))
+ "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately. Assume that the mode is always set to the
+;; destination one and athlon_decode is set to the K8 versions.
+
+;; cvtss2sd
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (eq_attr "mode" "DF"))))
+ "athlon-direct,athlon-fpsched,athlon-fstore")
+;; cvtps2pd. Model same way the other double decoded FP conversions.
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "V2DF,V4SF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (eq_attr "mode" "V2DF,V4SF,TI"))))
+ "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
+;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; cvtsi2ss mem, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-vector,athlon-fpload,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fploadk8,athlon-fstore")
+;; cvtsi2ss reg, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
+;; ??? Why it is fater than cvtsd2ss?
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-fvector*2")
+;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "load")))))
+ "athlon-vector,athlon-fploadk8,athlon-fvector")
+;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,athlon-fstore")
+
+
+(define_insn_reservation "athlon_ssemul_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssemul"))
+ "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssemul"))
+ "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+;; divsd timings. divss is faster
+(define_insn_reservation "athlon_ssediv_load" 20
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv_load_k8" 22
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv" 20
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fpsched,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector_load" 39
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector" 39
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssediv"))
+ "athlon-vector,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_k8" 39
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssediv"))
+ "athlon-double,athlon-fmul*34")
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h b/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h
new file mode 100644
index 000000000..46a55b0d1
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/biarch64.h
@@ -0,0 +1,25 @@
+/* Make configure files to produce biarch compiler defaulting to 64bit mode.
+ This file must be included very first, while the OS specific file later
+ to overwrite otherwise wrong defaults.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#define TARGET_64BIT_DEFAULT MASK_64BIT
+#define TARGET_BI_ARCH 1
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md b/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md
new file mode 100644
index 000000000..5d76ac523
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/constraints.md
@@ -0,0 +1,171 @@
+;; Constraint definitions for IA-32 and x86-64.
+;; Copyright (C) 2006 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;; Unused letters:
+;;; B H TU W
+;;; h jk vw z
+
+;; Integer register constraints.
+;; It is not necessary to define 'r' here.
+(define_register_constraint "R" "LEGACY_REGS"
+ "Legacy register---the eight integer registers available on all
+ i386 processors (@code{a}, @code{b}, @code{c}, @code{d},
+ @code{si}, @code{di}, @code{bp}, @code{sp}).")
+
+(define_register_constraint "q" "TARGET_64BIT ? GENERAL_REGS : Q_REGS"
+ "Any register accessible as @code{@var{r}l}. In 32-bit mode, @code{a},
+ @code{b}, @code{c}, and @code{d}; in 64-bit mode, any integer register.")
+
+(define_register_constraint "Q" "Q_REGS"
+ "Any register accessible as @code{@var{r}h}: @code{a}, @code{b},
+ @code{c}, and @code{d}.")
+
+(define_register_constraint "l" "INDEX_REGS"
+ "@internal Any register that can be used as the index in a base+index
+ memory access: that is, any general register except the stack pointer.")
+
+(define_register_constraint "a" "AREG"
+ "The @code{a} register.")
+
+(define_register_constraint "b" "BREG"
+ "The @code{b} register.")
+
+(define_register_constraint "c" "CREG"
+ "The @code{c} register.")
+
+(define_register_constraint "d" "DREG"
+ "The @code{d} register.")
+
+(define_register_constraint "S" "SIREG"
+ "The @code{si} register.")
+
+(define_register_constraint "D" "DIREG"
+ "The @code{di} register.")
+
+(define_register_constraint "A" "AD_REGS"
+ "The @code{a} and @code{d} registers, as a pair (for instructions
+ that return half the result in one and half in the other).")
+
+;; Floating-point register constraints.
+(define_register_constraint "f"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FLOAT_REGS : NO_REGS"
+ "Any 80387 floating-point (stack) register.")
+
+(define_register_constraint "t"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_TOP_REG : NO_REGS"
+ "Top of 80387 floating-point stack (@code{%st(0)}).")
+
+(define_register_constraint "u"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
+ "Second from top of 80387 floating-point stack (@code{%st(1)}).")
+
+;; Vector registers (also used for plain floating point nowadays).
+(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
+ "Any MMX register.")
+
+(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS"
+ "Any SSE register.")
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; We use the Y prefix to denote any number of conditional register sets:
+;; 0 First SSE register.
+;; t SSE2 enabled
+;; i SSE2 inter-unit moves enabled
+;; m MMX inter-unit moves enabled
+
+(define_register_constraint "Y0" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
+ "First SSE register (@code{%xmm0}).")
+
+(define_register_constraint "Yt" "TARGET_SSE2 ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 is enabled.")
+
+(define_register_constraint "Yi"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.")
+
+(define_register_constraint "Ym"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves are enabled.")
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Integer constant constraints.
+(define_constraint "I"
+ "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "J"
+ "Integer constant in the range 0 @dots{} 63, for 64-bit shifts."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 63")))
+
+(define_constraint "K"
+ "Signed 8-bit integer constant."
+ (and (match_code "const_int")
+ (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "L"
+ "@code{0xFF} or @code{0xFFFF}, for andsi as a zero-extending move."
+ (and (match_code "const_int")
+ (match_test "ival == 0xFF || ival == 0xFFFF")))
+
+(define_constraint "M"
+ "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 3")))
+
+(define_constraint "N"
+ "Unsigned 8-bit integer constant (for @code{in} and @code{out}
+ instructions)."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 255")))
+
+(define_constraint "O"
+ "@internal Integer constant in the range 0 @dots{} 127, for 128-bit shifts."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 127")))
+
+;; Floating-point constant constraints.
+;; We allow constants even if TARGET_80387 isn't set, because the
+;; stack register converter may need to load 0.0 into the function
+;; value register (top of stack).
+(define_constraint "G"
+ "Standard 80387 floating point constant."
+ (and (match_code "const_double")
+ (match_test "standard_80387_constant_p (op)")))
+
+;; This can theoretically be any mode's CONST0_RTX.
+(define_constraint "C"
+ "Standard SSE floating point constant."
+ (match_test "standard_sse_constant_p (op)"))
+
+;; Constant-or-symbol-reference constraints.
+
+(define_constraint "e"
+ "32-bit signed integer constant, or a symbolic reference known
+ to fit that range (for immediate operands in sign-extending x86-64
+ instructions)."
+ (match_operand 0 "x86_64_immediate_operand"))
+
+(define_constraint "Z"
+ "32-bit unsigned integer constant, or a symbolic reference known
+ to fit that range (for immediate operands in zero-extending x86-64
+ instructions)."
+ (match_operand 0 "x86_64_zext_immediate_operand"))
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver
new file mode 100644
index 000000000..aaeb934fe
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.4.ver
@@ -0,0 +1,81 @@
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver
new file mode 100644
index 000000000..02a085843
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin-libgcc.10.5.ver
@@ -0,0 +1,85 @@
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetIPInfo
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___floatundidf
+___floatundisf
+___floatundixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h
new file mode 100644
index 000000000..d0b1db12b
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.h
@@ -0,0 +1,452 @@
+/* Target definitions for x86 running Darwin.
+ Copyright (C) 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+ Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* Enable Mach-O bits in generic x86 code. */
+#undef TARGET_MACHO
+#define TARGET_MACHO 1
+
+/* APPLE LOCAL begin mainline */
+#undef TARGET_64BIT
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+#define TARGET_64BIT (target_flags & MASK_64BIT)
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+#define TARGET_VERSION fprintf (stderr, " (i686 Darwin)");
+/* APPLE LOCAL end mainline */
+
+#undef TARGET_64BIT
+#define TARGET_64BIT (target_flags & MASK_64BIT)
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* Size of the Obj-C jump buffer. */
+#define OBJC_JBLEN ((TARGET_64BIT) ? ((9 * 2) + 3 + 16) : (18))
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+
+/* APPLE LOCAL begin mainline */
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD 64
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__LITTLE_ENDIAN__"); \
+ darwin_cpp_builtins (pfile); \
+ } \
+ while (0)
+/* APPLE LOCAL end mainline */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD 64
+
+#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
+
+/* We want -fPIC by default, unless we're using -static to compile for
+ the kernel or some such. */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+ "/* APPLE LOCAL ARM ignore -mthumb and -mno-thumb */"\
+ %<mthumb %<mno-thumb \
+ "/* APPLE LOCAL ARM 5683689 */"\
+ %{!mmacosx-version-min=*: %{!miphoneos-version-min=*: %(darwin_cc1_minversion)}} \
+ "/* APPLE LOCAL ignore -mcpu=G4 -mcpu=G5 */"\
+ %<faltivec %<mno-fused-madd %<mlong-branch %<mlongcall %<mcpu=G4 %<mcpu=G5 \
+ %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }}"
+
+/* APPLE LOCAL AltiVec */
+#define CPP_ALTIVEC_SPEC "%<faltivec"
+
+/* APPLE LOCAL begin mainline */
+#undef ASM_SPEC
+/* APPLE LOCAL begin kext weak_import 5935650 */
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
+ %{mkernel|static|fapple-kext:%{!m64:-static}}"
+/* APPLE LOCAL end kext weak_import 5935650 */
+
+#define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+/* APPLE LOCAL begin mainline 2007-03-13 5005743 5040758 */ \
+/* Determine a minimum version based on compiler options. */
+#define DARWIN_MINVERSION_SPEC \
+ "%{!m64|fgnu-runtime:10.4; \
+ ,objective-c|,objc-cpp-output:10.5; \
+ ,objective-c-header:10.5; \
+ ,objective-c++|,objective-c++-cpp-output:10.5; \
+ ,objective-c++-header|,objc++-cpp-output:10.5; \
+ :10.4}"
+
+/* APPLE LOCAL end mainline 2007-03-13 5005743 5040758 */ \
+/* APPLE LOCAL begin ARM 5683689 */
+/* Default cc1 option for specifying minimum version number. */
+#define DARWIN_CC1_MINVERSION_SPEC "-mmacosx-version-min=%(darwin_minversion)"
+
+/* Default ld option for specifying minimum version number. */
+#define DARWIN_LD_MINVERSION_SPEC "-macosx_version_min %(darwin_minversion)"
+
+/* Use macosx version numbers by default. */
+#define DARWIN_DEFAULT_VERSION_TYPE DARWIN_VERSION_MACOSX
+/* APPLE LOCAL end ARM 5683689 */
+
+/* APPLE LOCAL ARM 5681645 8307333 */
+#define DARWIN_IPHONEOS_LIBGCC_SPEC "-lgcc"
+
+/* APPLE LOCAL begin link optimizations 6499452 */
+#undef DARWIN_CRT1_SPEC
+#define DARWIN_CRT1_SPEC \
+ "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o) \
+ %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lcrt1.10.5.o) \
+ %:version-compare(>= 10.6 mmacosx-version-min= -lcrt1.10.6.o)"
+
+#undef DARWIN_DYLIB1_SPEC
+#define DARWIN_DYLIB1_SPEC \
+ "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \
+ %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)"
+
+#undef DARWIN_BUNDLE1_SPEC
+#define DARWIN_BUNDLE1_SPEC \
+ "%:version-compare(!> 10.6 mmacosx-version-min= -lbundle1.o)"
+/* APPLE LOCAL end link optimizations 6499452 */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ DARWIN_EXTRA_SPECS \
+ { "darwin_arch", DARWIN_ARCH_SPEC }, \
+ { "darwin_crt2", "" }, \
+ { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+/* APPLE LOCAL end mainline */
+
+/* APPLE LOCAL begin prefer -lSystem 6645902 */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+ "%{miphoneos-version-min=*: %G %L} \
+ %{!miphoneos-version-min=*: \
+ %{!static:%:version-compare(>= 10.6 mmacosx-version-min= -lSystem)} %G %L}"
+/* APPLE LOCAL end prefer -lSystem 6645902 */
+
+/* Use the following macro for any Darwin/x86-specific command-line option
+ translation. */
+#define SUBTARGET_OPTION_TRANSLATE_TABLE \
+ { "", "" }
+
+/* The Darwin assembler mostly follows AT&T syntax. */
+#undef ASSEMBLER_DIALECT
+#define ASSEMBLER_DIALECT ASM_ATT
+
+/* Define macro used to output shift-double opcodes when the shift
+ count is in %cl. Some assemblers require %cl as an argument;
+ some don't. This macro controls what to do: by default, don't
+ print %cl. */
+
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+extern void darwin_x86_file_end (void);
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_x86_file_end
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+/* String containing the assembler's comment-starter. */
+
+#define ASM_COMMENT_START "#"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ and returns float values in the 387. */
+
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
+/* APPLE LOCAL begin mainline */
+/* For darwin we want to target specific processor features as a minimum,
+ but these unfortunately don't correspond to a specific processor. */
+#undef TARGET_SUBTARGET32_DEFAULT
+#define TARGET_SUBTARGET32_DEFAULT (MASK_MMX \
+ | MASK_SSE \
+ | MASK_SSE2)
+
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT (MASK_MMX \
+ | MASK_SSE \
+ | MASK_SSE2 \
+ | MASK_SSE3)
+/* APPLE LOCAL end mainline */
+/* APPLE LOCAL mdynamic-no-pic */
+/* Remove disabling of mdynamic-no-pic */
+
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME (machopic_function_base_name ())
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+#define LPREFIX "L"
+
+/* These are used by -fbranch-probabilities */
+#define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions"
+#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \
+ "__TEXT,__unlikely,regular,pure_instructions"
+
+/* Assembler pseudos to introduce constants of various size. */
+
+#define ASM_BYTE_OP "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"
+
+#define SUBTARGET_ENCODE_SECTION_INFO darwin_encode_section_info
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ do { if ((LOG) != 0) \
+ { \
+ if (in_section == text_section) \
+ fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \
+ else \
+ fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \
+ } \
+ } while (0)
+
+/* This says how to output an assembler line
+ to define a global common symbol. */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".comm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ /* APPLE LOCAL begin mainline */ \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED)))
+ /* APPLE LOCAL end mainline */
+
+/* This says how to output an assembler line
+ to define a local common symbol. */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED)))
+
+
+/* APPLE LOCAL begin Macintosh alignment 2002-2-19 --ff */
+#if 0
+#define MASK_ALIGN_NATURAL 0x40000000
+#define TARGET_ALIGN_NATURAL (target_flags & MASK_ALIGN_NATURAL)
+#define MASK_ALIGN_MAC68K 0x20000000
+#define TARGET_ALIGN_MAC68K (target_flags & MASK_ALIGN_MAC68K)
+#endif
+#define rs6000_alignment_flags target_flags
+
+#define ROUND_TYPE_ALIGN(TYPE, COMPUTED, SPECIFIED) \
+ (((TREE_CODE (TYPE) == RECORD_TYPE \
+ || TREE_CODE (TYPE) == UNION_TYPE \
+ || TREE_CODE (TYPE) == QUAL_UNION_TYPE) \
+ && OPTION_ALIGN_MAC68K \
+ && MAX (COMPUTED, SPECIFIED) == 8) ? 16 \
+ : MAX (COMPUTED, SPECIFIED))
+/* APPLE LOCAL end Macintosh alignment 2002-2-19 --ff */
+
+/* Darwin profiling -- call mcount. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+ do { \
+ /* APPLE LOCAL axe stubs 5571540 */ \
+ if (darwin_stubs && MACHOPIC_INDIRECT && !TARGET_64BIT) \
+ { \
+ const char *name = machopic_mcount_stub_name (); \
+ fprintf (FILE, "\tcall %s\n", name+1); /* skip '&' */ \
+ machopic_validate_stub_or_non_lazy_ptr (name); \
+ } \
+ else fprintf (FILE, "\tcall mcount\n"); \
+ } while (0)
+
+/* APPLE LOCAL CW asm blocks */
+extern int flag_iasm_blocks;
+/* APPLE LOCAL begin fix-and-continue x86 */
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+ do { \
+ /* APPLE LOCAL begin ARM 5683689 */ \
+ if (!darwin_macosx_version_min \
+ && !darwin_iphoneos_version_min) \
+ darwin_macosx_version_min = "10.1"; \
+ /* APPLE LOCAL end ARM 5683689 */ \
+ /* APPLE LOCAL begin CW asm blocks */ \
+ if (flag_iasm_blocks) \
+ flag_ms_asms = 1; \
+ /* APPLE LOCAL end CW asm blocks */ \
+ /* APPLE LOCAL begin constant cfstrings */ \
+ if (darwin_constant_cfstrings < 0) \
+ darwin_constant_cfstrings = 1; \
+ /* APPLE LOCAL end constant cfstrings */ \
+ if (TARGET_64BIT) \
+ { \
+ if (MACHO_DYNAMIC_NO_PIC_P) \
+ target_flags &= ~MASK_MACHO_DYNAMIC_NO_PIC; \
+ } \
+ /* APPLE LOCAL begin fix this for mainline */ \
+ /* For mainline this needs to be fixed to have every \
+ cpu architecture feature as an isa mask. Every \
+ cpu we've shipped supports all of these features. \
+ This includes all ix86_arch cpu features currently \
+ defined except x86_cmove which is turned on for \
+ TARGET_SSE anyhow. */ \
+ if (!ix86_arch_string) \
+ { \
+ x86_cmpxchg = ~(0); \
+ x86_cmpxchg8b = ~(0); \
+ x86_cmpxchg16b = ~(0); \
+ x86_xadd = ~(0); \
+ x86_bswap = ~(0); \
+ } \
+ /* APPLE LOCAL end fix this for mainline */ \
+ } while (0)
+
+/* True, iff we're generating fast turn around debugging code. When
+ true, we arrange for function prologues to start with 6 nops so
+ that gdb may insert code to redirect them, and for data to be
+ accessed indirectly. The runtime uses this indirection to forward
+ references for data to the original instance of that data. */
+
+#define TARGET_FIX_AND_CONTINUE (darwin_fix_and_continue)
+/* APPLE LOCAL end fix-and-continue x86 */
+
+#define C_COMMON_OVERRIDE_OPTIONS \
+ do { \
+ SUBTARGET_C_COMMON_OVERRIDE_OPTIONS; \
+ } while (0)
+
+/* APPLE LOCAL begin mainline 4.3 2006-10-31 4370143 */
+/* Removed PREFERRED_DEBUGGING_TYPE */
+/* APPLE LOCAL end mainline 4.3 2006-10-31 4370143 */
+
+/* Darwin uses the standard DWARF register numbers but the default
+ register numbers for STABS. Fortunately for 64-bit code the
+ default and the standard are the same. */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] \
+ : write_symbols == DWARF2_DEBUG ? svr4_dbx_register_map[n] \
+ : dbx_register_map[n])
+
+/* Unfortunately, the 32-bit EH information also doesn't use the standard
+ DWARF register numbers. */
+#define DWARF2_FRAME_REG_OUT(n, for_eh) \
+ (! (for_eh) || write_symbols != DWARF2_DEBUG || TARGET_64BIT ? (n) \
+ : (n) == 5 ? 4 \
+ : (n) == 4 ? 5 \
+ : (n) >= 11 && (n) <= 18 ? (n) + 1 \
+ : (n))
+
+/* APPLE LOCAL begin 4457939 stack alignment mishandled */
+/* <rdar://problem/4471596> stack alignment is not handled properly
+
+ Please remove this entire a**le local when addressing this
+ Radar. */
+extern void ix86_darwin_init_expanders (void);
+#define INIT_EXPANDERS (ix86_darwin_init_expanders ())
+/* APPLE LOCAL end 4457939 stack alignment mishandled */
+
+
+/* APPLE LOCAL begin CW asm blocks */
+#define IASM_VALID_PIC(DECL, E) \
+ do { \
+ if (! TARGET_64BIT \
+ && E->as_immediate && ! MACHO_DYNAMIC_NO_PIC_P && flag_pic) \
+ warning (0, "non-pic addressing form not suitible for pic code"); \
+ } while (0)
+#define IASM_RIP(X) do { if (TARGET_64BIT) strcat (X, "(%%rip)"); } while (0)
+/* APPLE LOCAL end cw asm blocks */
+
+/* APPLE LOCAL KEXT */
+#define TARGET_SUPPORTS_KEXTABI1 (! TARGET_64BIT)
+
+
+#undef REGISTER_TARGET_PRAGMAS
+#define REGISTER_TARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS()
+
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes
+
+/* APPLE LOCAL begin mainline */
+/* For 64-bit, we need to add 4 because @GOTPCREL is relative to the
+ end of the instruction, but without the 4 we'd only have the right
+ address for the start of the instruction. */
+#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+ if (TARGET_64BIT) \
+ { \
+ if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel) \
+ { \
+ fputs (ASM_LONG, FILE); \
+ assemble_name (FILE, XSTR (ADDR, 0)); \
+ fputs ("+4@GOTPCREL", FILE); \
+ goto DONE; \
+ } \
+ } \
+ else \
+ { \
+ if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) \
+ { \
+ darwin_non_lazy_pcrel (FILE, ADDR); \
+ goto DONE; \
+ } \
+ }
+/* APPLE LOCAL end mainline */
+/* APPLE LOCAL begin track initialization status 4964532 */
+/* APPLE LOCAL begin ARM 5683689 */
+#undef TARGET_DWARF_UNINIT_VARS
+#define TARGET_DWARF_UNINIT_VARS \
+ (darwin_iphoneos_version_min || \
+ strverscmp (darwin_macosx_version_min, "10.4") >= 0)
+/* APPLE LOCAL end ARM 5683689 */
+/* APPLE LOCAL end track initialization status 4964532 */
+
+/* This needs to move since i386 uses the first flag and other flags are
+ used in Mach-O. */
+#undef MACHO_SYMBOL_FLAG_VARIABLE
+#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3)
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt
new file mode 100644
index 000000000..90854e1ca
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin.opt
@@ -0,0 +1,15 @@
+; APPLE LOCAL begin Macintosh alignment 2002-2-19 --ff
+malign-mac68k
+Target Report Mask(ALIGN_MAC68K) Var(darwin_alignment_flags)
+Align structs and unions according to mac68k rules
+
+malign-natural
+Target Report Mask(ALIGN_NATURAL) Var(darwin_alignment_flags)
+Align structs and unions according to natural rules
+
+; Maybe we don't need this.
+;malign-power
+;; I want this to clear MASK_ALIGN_MAC68K | MASK_ALIGN_NATURAL
+;Target Undocumented InverseMask(ALIGN_MAC68K)
+;Align structs and unions according to PowerPC rules
+; APPLE LOCAL end Macintosh alignment 2002-2-19 --ff
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h b/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h
new file mode 100644
index 000000000..e630a7064
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/darwin64.h
@@ -0,0 +1,43 @@
+/* Target definitions for x86_64 running Darwin.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (x86_64 Darwin)");
+
+#undef DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:i386;:x86_64}"
+
+#undef DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+/* APPLE LOCAL begin kext 6400713 */
+#undef ASM_SPEC
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
+ %{mkernel|static|fapple-kext:%{m32:-static}}"
+/* APPLE LOCAL end kext 6400713 */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ /* APPLE LOCAL 6015949 */ \
+ DARWIN_EXTRA_SPECS \
+ { "darwin_arch", DARWIN_ARCH_SPEC }, \
+ { "darwin_crt2", "" }, \
+ { "darwin_subarch", DARWIN_SUBARCH_SPEC },
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c
new file mode 100644
index 000000000..ffcee4e55
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/driver-i386.c
@@ -0,0 +1,300 @@
+/* Subroutines for the gcc driver.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <stdlib.h>
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#ifdef GCC_VERSION
+#define cpuid(num,a,b,c,d) \
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" \
+ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+ : "0" (num))
+
+#define bit_CMPXCHG8B (1 << 8)
+#define bit_CMOV (1 << 15)
+#define bit_MMX (1 << 23)
+#define bit_SSE (1 << 25)
+#define bit_SSE2 (1 << 26)
+
+#define bit_SSE3 (1 << 0)
+#define bit_CMPXCHG16B (1 << 13)
+
+#define bit_3DNOW (1 << 31)
+#define bit_3DNOWP (1 << 30)
+#define bit_LM (1 << 29)
+
+/* This will be called by the spec parser in gcc.c when it sees
+ a %:local_cpu_detect(args) construct. Currently it will be called
+ with either "arch" or "tune" as argument depending on if -march=native
+ or -mtune=native is to be substituted.
+
+ It returns a string containing new command line parameters to be
+ put at the place of the above two options, depending on what CPU
+ this is executed. E.g. "-march=k8" on an AMD64 machine
+ for -march=native.
+
+ ARGC and ARGV are set depending on the actual arguments given
+ in the spec. */
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+ const char *cpu = NULL;
+ enum processor_type processor = PROCESSOR_I386;
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int max_level;
+ unsigned int vendor;
+ unsigned int ext_level;
+ unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0;
+ unsigned char has_sse2 = 0, has_sse3 = 0, has_cmov = 0;
+ unsigned char has_longmode = 0, has_cmpxchg8b = 0;
+ unsigned char is_amd = 0;
+ unsigned int family = 0;
+ bool arch;
+
+ if (argc < 1)
+ return NULL;
+
+ arch = strcmp (argv[0], "arch") == 0;
+ if (!arch && strcmp (argv[0], "tune"))
+ return NULL;
+
+#ifndef __x86_64__
+ /* See if we can use cpuid. */
+ asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
+ "pushl %0; popfl; pushfl; popl %0; popfl"
+ : "=&r" (eax), "=&r" (ebx)
+ : "i" (0x00200000));
+
+ if (((eax ^ ebx) & 0x00200000) == 0)
+ goto done;
+#endif
+
+ processor = PROCESSOR_PENTIUM;
+
+ /* Check the highest input value for eax. */
+ cpuid (0, eax, ebx, ecx, edx);
+ max_level = eax;
+ /* We only look at the first four characters. */
+ vendor = ebx;
+ if (max_level == 0)
+ goto done;
+
+ cpuid (1, eax, ebx, ecx, edx);
+ has_cmpxchg8b = !!(edx & bit_CMPXCHG8B);
+ has_cmov = !!(edx & bit_CMOV);
+ has_mmx = !!(edx & bit_MMX);
+ has_sse = !!(edx & bit_SSE);
+ has_sse2 = !!(edx & bit_SSE2);
+ has_sse3 = !!(ecx & bit_SSE3);
+ /* We don't care for extended family. */
+ family = (eax >> 8) & ~(1 << 4);
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+ ext_level = eax;
+ if (ext_level >= 0x80000000)
+ {
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+ has_3dnow = !!(edx & bit_3DNOW);
+ has_3dnowp = !!(edx & bit_3DNOWP);
+ has_longmode = !!(edx & bit_LM);
+ }
+
+ is_amd = vendor == *(unsigned int*)"Auth";
+
+ if (is_amd)
+ {
+ if (has_mmx)
+ processor = PROCESSOR_K6;
+ if (has_3dnowp)
+ processor = PROCESSOR_ATHLON;
+ if (has_sse2 || has_longmode)
+ processor = PROCESSOR_K8;
+ }
+ else
+ {
+ switch (family)
+ {
+ case 5:
+ /* Default is PROCESSOR_PENTIUM. */
+ break;
+ case 6:
+ processor = PROCESSOR_PENTIUMPRO;
+ break;
+ case 15:
+ processor = PROCESSOR_PENTIUM4;
+ break;
+ default:
+ /* We have no idea. Use something reasonable. */
+ if (arch)
+ {
+ if (has_sse3)
+ {
+ if (has_longmode)
+ cpu = "nocona";
+ else
+ cpu = "prescott";
+ }
+ else if (has_sse2)
+ cpu = "pentium4";
+ else if (has_cmov)
+ cpu = "pentiumpro";
+ else if (has_mmx)
+ cpu = "pentium-mmx";
+ else if (has_cmpxchg8b)
+ cpu = "pentium";
+ else
+ cpu = "i386";
+ }
+ else
+ cpu = "generic";
+ goto done;
+ break;
+ }
+ }
+
+ switch (processor)
+ {
+ case PROCESSOR_I386:
+ cpu = "i386";
+ break;
+ case PROCESSOR_I486:
+ cpu = "i486";
+ break;
+ case PROCESSOR_PENTIUM:
+ if (has_mmx)
+ cpu = "pentium-mmx";
+ else
+ cpu = "pentium";
+ break;
+ case PROCESSOR_PENTIUMPRO:
+ if (arch)
+ {
+ if (has_sse3)
+ {
+ if (has_longmode)
+ {
+ /* It is Core 2 Duo. */
+ cpu = "nocona";
+ }
+ else
+ {
+ /* It is Core Duo. */
+ cpu = "prescott";
+ }
+ }
+ else if (has_sse2)
+ {
+ /* It is Pentium M. */
+ cpu = "pentium4";
+ }
+ else if (has_sse)
+ {
+ /* It is Pentium III. */
+ cpu = "pentium3";
+ }
+ else if (has_mmx)
+ {
+ /* It is Pentium II. */
+ cpu = "pentium2";
+ }
+ else
+ {
+ /* Default to Pentium Pro. */
+ cpu = "pentiumpro";
+ }
+ }
+ else
+ {
+ /* For -mtune, we default to -mtune=generic. */
+ cpu = "generic";
+ }
+ break;
+ case PROCESSOR_K6:
+ if (has_3dnow)
+ cpu = "k6-3";
+ else
+ cpu = "k6";
+ break;
+ case PROCESSOR_ATHLON:
+ if (has_sse)
+ cpu = "athlon-4";
+ else
+ cpu = "athlon";
+ break;
+ case PROCESSOR_PENTIUM4:
+ if (has_sse3)
+ {
+ if (has_longmode)
+ cpu = "nocona";
+ else
+ cpu = "prescott";
+ }
+ else
+ cpu = "pentium4";
+ break;
+ case PROCESSOR_K8:
+ cpu = "k8";
+ break;
+ case PROCESSOR_NOCONA:
+ cpu = "nocona";
+ break;
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ cpu = "generic";
+ break;
+ default:
+ abort ();
+ break;
+ }
+
+done:
+ return concat ("-m", argv[0], "=", cpu, NULL);
+}
+#else
+/* If we aren't compiling with GCC we just provide a minimal
+ default value. */
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+ const char *cpu;
+ bool arch;
+
+ if (argc < 1)
+ return NULL;
+
+ arch = strcmp (argv[0], "arch") == 0;
+ if (!arch && strcmp (argv[0], "tune"))
+ return NULL;
+
+ if (arch)
+ {
+ /* FIXME: i386 is wrong for 64bit compiler. How can we tell if
+ we are generating 64bit or 32bit code? */
+ cpu = "i386";
+ }
+ else
+ cpu = "generic";
+
+ return concat ("-m", argv[0], "=", cpu, NULL);
+}
+#endif /* GCC_VERSION */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h
new file mode 100644
index 000000000..857ea6ff9
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/emmintrin.h
@@ -0,0 +1,1981 @@
+/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
+/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+#ifdef __SSE2__
+#include <xmmintrin.h>
+
+/* SSE2 */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Create a selector for use with the SHUFPD instruction. */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* APPLE LOCAL begin radar 4152603 */
+/* Create a vector with element 0 as F and the rest zero. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_sd (double __F)
+{
+ return __extension__ (__m128d){ __F, 0 };
+}
+
+/* Create a vector with both elements equal to F. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_pd (double __F)
+{
+ return __extension__ (__m128d){ __F, __F };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pd1 (double __F)
+{
+ return _mm_set1_pd (__F);
+}
+
+/* Create a vector with the lower value X and upper value W. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __X, __W };
+}
+
+/* Create a vector with the lower value W and upper value X. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __W, __X };
+}
+
+/* Create a vector of zeros. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setzero_pd (void)
+{
+ return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
+/* Sets the low DPFP value of A from the low value of B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* Load two DPFP values from P. The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_pd (double const *__P)
+{
+ return *(__m128d *)__P;
+}
+
+/* Load two DPFP values from P. The address need not be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadu_pd (double const *__P)
+{
+ return __builtin_ia32_loadupd (__P);
+}
+
+/* Create a vector with all two elements equal to *P. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load1_pd (double const *__P)
+{
+ return _mm_set1_pd (*__P);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_sd (double const *__P)
+{
+ return _mm_set_sd (*__P);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_pd1 (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values in reverse order. The address must be aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadr_pd (double const *__P)
+{
+ __m128d __tmp = _mm_load_pd (__P);
+ return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Store two DPFP values. The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_pd (double *__P, __m128d __A)
+{
+ *(__m128d *)__P = __A;
+}
+
+/* Store two DPFP values. The address need not be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+ __builtin_ia32_storeupd (__P, __A);
+}
+
+/* Stores the lower DPFP value. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_sd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE double __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsd_f64 (__m128d __A)
+{
+ return __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storel_pd (double *__P, __m128d __A)
+{
+ _mm_store_sd (__P, __A);
+}
+
+/* Stores the upper DPFP value. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storeh_pd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
+}
+
+/* Store the lower DPFP value across two words.
+ The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store1_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+ _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values in reverse order. The address must be aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storer_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi128_si32 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi128_si64 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sqrt_pd (__m128d __A)
+{
+ return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+ __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+ return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmplesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnlesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector of Qi, where i is the element number. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_epi64 (__m64 __q1, __m64 __q0)
+{
+ return _mm_set_epi64x ((long long)__q1, (long long)__q0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+ short __q3, short __q2, short __q1, short __q0)
+{
+ return __extension__ (__m128i)(__v8hi){
+ __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m128i)(__v16qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+/* APPLE LOCAL begin 4220129 */
+/* functions moved to end of file */
+/* APPLE LOCAL end 4220129 */
+
+/* Create a vector of Qi, where i is the element number.
+ The parameter order is reversed from the _mm_set_epi* functions. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_epi64 (__m64 __q0, __m64 __q1)
+{
+ return _mm_set_epi64 (__q1, __q0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
+{
+ return _mm_set_epi32 (__q3, __q2, __q1, __q0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
+ short __q4, short __q5, short __q6, short __q7)
+{
+ return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
+ char __q04, char __q05, char __q06, char __q07,
+ char __q08, char __q09, char __q10, char __q11,
+ char __q12, char __q13, char __q14, char __q15)
+{
+ return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
+ __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_si128 (__m128i const *__P)
+{
+ return *__P;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+/* APPLE LOCAL begin 4099020 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadl_epi64 (__m128i const *__P)
+{
+ return (__m128i)__builtin_ia32_loadlv4si ((__v2si *)__P);
+}
+/* APPLE LOCAL end 4099020 */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+ *__P = __B;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+ __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin 4099020 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+ __builtin_ia32_storelv4si ((__v2si *)__P, __B);
+}
+/* APPLE LOCAL end 4099020 */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movepi64_pi64 (__m128i __B)
+{
+ return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movpi64_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 ((__m64)0LL, __A);
+}
+
+/* APPLE LOCAL begin 4099020 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_move_epi64 (__m128i __A)
+{
+ return (__m128i)__builtin_ia32_movqv4si ((__v4si)__A) ;
+}
+/* APPLE LOCAL end 4099020 */
+
+/* Create a vector of zeros. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setzero_si128 (void)
+{
+ return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtepi32_pd (__m128i __A)
+{
+ return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtepi32_ps (__m128i __A)
+{
+ return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpd_ps (__m128d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpi32_pd (__m64 __A)
+{
+ return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtps_pd (__m128 __A)
+{
+ return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+ return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+ return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL 5814283 */
+#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)(__A), (__v2df)(__B), (__C)))
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movemask_pd (__m128d __A)
+{
+ return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+#else
+#define _mm_slli_epi16(__A, __B) \
+ ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B))
+#define _mm_slli_epi32(__A, __B) \
+ ((__m128i)__builtin_ia32_pslldi128 ((__v8hi)(__A), __B))
+#define _mm_slli_epi64(__A, __B) \
+ ((__m128i)__builtin_ia32_psllqi128 ((__v8hi)(__A), __B))
+#endif
+
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+#else
+#define _mm_srai_epi16(__A, __B) \
+ ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B))
+#define _mm_srai_epi32(__A, __B) \
+ ((__m128i)__builtin_ia32_psradi128 ((__v8hi)(__A), __B))
+#endif
+
+#if 0
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, int __B)
+{
+ return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8));
+}
+
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, int __B)
+{
+ return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8));
+}
+#else
+/* APPLE LOCAL begin 5919583 */
+#define _mm_srli_si128 (__m128i)__builtin_ia32_psrldqi128_byteshift
+#define _mm_slli_si128 (__m128i)__builtin_ia32_pslldqi128_byteshift
+/* APPLE LOCAL end 5919583 */
+#endif
+
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+#else
+#define _mm_srli_epi16(__A, __B) \
+ ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B))
+#define _mm_srli_epi32(__A, __B) \
+ ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B))
+#define _mm_srli_epi64(__A, __B) \
+ ((__m128i)__builtin_ia32_psrlqi128 ((__v4si)(__A), __B))
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_extract_epi16 (__m128i const __A, int const __N)
+{
+ return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
+}
+#else
+#define _mm_extract_epi16(A, N) \
+ ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N)))
+#define _mm_insert_epi16(A, D, N) \
+ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
+#endif
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movemask_epi8 (__m128i __A)
+{
+ return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin 5814283 */
+#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__A), __B))
+#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__A), __B))
+#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)(__A), __B))
+/* APPLE LOCAL end 5814283 */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+ __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_stream_si32 (int *__A, int __B)
+{
+ __builtin_ia32_movnti (__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+ __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_stream_pd (double *__A, __m128d __B)
+{
+ __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_clflush (void const *__A)
+{
+ __builtin_ia32_clflush (__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_lfence (void)
+{
+ __builtin_ia32_lfence ();
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mfence (void)
+{
+ __builtin_ia32_mfence ();
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi32_si128 (int __A)
+{
+ return _mm_set_epi32 (0, 0, 0, __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+#endif
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castpd_ps(__m128d __A)
+{
+ return (__m128) __A;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castpd_si128(__m128d __A)
+{
+ return (__m128i) __A;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castps_pd(__m128 __A)
+{
+ return (__m128d) __A;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castps_si128(__m128 __A)
+{
+ return (__m128i) __A;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castsi128_ps(__m128i __A)
+{
+ return (__m128) __A;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_castsi128_pd(__m128i __A)
+{
+ return (__m128d) __A;
+}
+/* APPLE LOCAL end radar 4152603 */
+
+/* APPLE LOCAL begin 4220129, 4286110 */
+/* Set all of the elements of the vector to A. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_epi64x (long long __A)
+{
+ return _mm_set_epi64x (__A, __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 (__A, __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_epi32 (int __A)
+{
+ return _mm_set_epi32 (__A, __A, __A, __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_epi16 (short __A)
+{
+ __m128i temp, temp2, temp3;
+ temp = _mm_cvtsi32_si128((int)__A);
+ temp2 = _mm_unpacklo_epi16(temp, temp);
+ temp3 = _mm_shuffle_epi32(temp2, 0);
+ return temp3;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_epi8 (char __A)
+{
+ __m128i temp, temp2, temp3, temp4;
+ temp = _mm_cvtsi32_si128 ((int)__A);
+ temp2 = _mm_unpacklo_epi8 (temp, temp);
+ temp3 = _mm_unpacklo_epi8 (temp2, temp2);
+ temp4 = _mm_shuffle_epi32 (temp3, 0);
+ return temp4;
+}
+/* APPLE LOCAL end 4220129, 4286110 */
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+#endif /* __SSE2__ */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h b/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h
new file mode 100644
index 000000000..20d7f5e04
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/gmm_malloc.h
@@ -0,0 +1,77 @@
+/* Copyright (C) 2004 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+#include <errno.h>
+
+static __inline__ void*
+_mm_malloc (size_t size, size_t align)
+{
+ void * malloc_ptr;
+ void * aligned_ptr;
+
+ /* Error if align is not a power of two. */
+ if (align & (align - 1))
+ {
+ errno = EINVAL;
+ return ((void*) 0);
+ }
+
+ if (size == 0)
+ return ((void *) 0);
+
+ /* Assume malloc'd pointer is aligned at least to sizeof (void*).
+ If necessary, add another sizeof (void*) to store the value
+ returned by malloc. Effectively this enforces a minimum alignment
+ of sizeof double. */
+ if (align < 2 * sizeof (void *))
+ align = 2 * sizeof (void *);
+
+ malloc_ptr = malloc (size + align);
+ if (!malloc_ptr)
+ return ((void *) 0);
+
+ /* Align We have at least sizeof (void *) space below malloc'd ptr. */
+ aligned_ptr = (void *) (((size_t) malloc_ptr + align)
+ & ~((size_t) (align) - 1));
+
+ /* Store the original pointer just before p. */
+ ((void **) aligned_ptr) [-1] = malloc_ptr;
+
+ return aligned_ptr;
+}
+
+static __inline__ void
+_mm_free (void * aligned_ptr)
+{
+ if (aligned_ptr)
+ free (((void **) aligned_ptr) [-1]);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h b/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h
new file mode 100644
index 000000000..e9a621871
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/gstabs.h
@@ -0,0 +1,7 @@
+/* We do not want to output SDB debugging information. */
+
+#undef SDB_DEBUGGING_INFO
+
+/* We want to output DBX debugging information. */
+
+#define DBX_DEBUGGING_INFO 1
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c b/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c
new file mode 100644
index 000000000..3025bdae6
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/host-i386-darwin.c
@@ -0,0 +1,31 @@
+/* i386-darwin host-specific hook definitions.
+ Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for x86 hosts; this file exists just
+ to include config/host-darwin.h. */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def b/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def
new file mode 100644
index 000000000..3cb4cb1b8
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386-modes.def
@@ -0,0 +1,97 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* The x86_64 ABI specifies both XF and TF modes.
+ XFmode is __float80 is IEEE extended; TFmode is __float128
+ is IEEE quad. */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* In ILP32 mode, XFmode has size 12 and alignment 4.
+ In LP64 mode, XFmode has size and alignment 16. */
+ADJUST_FLOAT_FORMAT (XF, (TARGET_128BIT_LONG_DOUBLE
+ ? &ieee_extended_intel_128_format
+ : TARGET_96_ROUND_53_LONG_DOUBLE
+ ? &ieee_extended_intel_96_round_53_format
+ : &ieee_extended_intel_96_format));
+ADJUST_BYTESIZE (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 12);
+ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
+
+/* Add any extra modes needed to represent the condition code.
+
+ For the i386, we need separate modes when floating-point
+ equality comparisons are being done.
+
+ Add CCNO to indicate comparisons against zero that requires
+ Overflow flag to be unset. Sign bit test is used instead and
+ thus can be used to form "a&b>0" type of tests.
+
+ Add CCGC to indicate comparisons against zero that allows
+ unspecified garbage in the Carry flag. This mode is used
+ by inc/dec instructions.
+
+ Add CCGOC to indicate comparisons against zero that allows
+ unspecified garbage in the Carry and Overflow flag. This
+ mode is used to simulate comparisons of (a-b) and (a+b)
+ against zero using sub/cmp/add operations.
+
+ APPLE LOCAL begin 5612787 mainline sse4
+ Add CCA to indicate that only the Above flag is valid.
+ Add CCC to indicate that only the Carry flag is valid.
+ Add CCO to indicate that only the Overflow flag is valid.
+ Add CCS to indicate that only the Sign flag is valid.
+ APPLE LOCAL end 5612787 mainline sse4
+ Add CCZ to indicate that only the Zero flag is valid. */
+
+CC_MODE (CCGC);
+CC_MODE (CCGOC);
+CC_MODE (CCNO);
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+CC_MODE (CCA);
+CC_MODE (CCC);
+CC_MODE (CCO);
+CC_MODE (CCS);
+/* APPLE LOCAL end 5612787 mainline sse4 */
+CC_MODE (CCZ);
+CC_MODE (CCFP);
+CC_MODE (CCFPU);
+
+/* Vector modes. */
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
+VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
+/* APPLE LOCAL 5612787 mainline sse4 */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, DI, 4); /* V4DI */
+VECTOR_MODE (INT, SI, 8); /* V8SI */
+VECTOR_MODE (INT, HI, 16); /* V16HI */
+VECTOR_MODE (INT, QI, 32); /* V32QI */
+VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
+VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
+/* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+VECTOR_MODE (INT, DI, 1); /* V1DI (__mm64) */
+VECTOR_MODE (INT, SI, 1); /* V1SI */
+/* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+/* The symbol Pmode stands for one of the above machine modes (usually SImode).
+ The tm.h file specifies which one. It is not a distinct mode. */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h b/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h
new file mode 100644
index 000000000..f92428e55
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386-protos.h
@@ -0,0 +1,261 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1996, 1997, 1998, 1999,
+ 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* Functions in i386.c */
+extern void override_options (void);
+extern void optimization_options (int, int);
+
+extern int ix86_can_use_return_insn_p (void);
+extern int ix86_frame_pointer_required (void);
+extern void ix86_setup_frame_addresses (void);
+
+extern void ix86_file_end (void);
+extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
+extern void ix86_expand_prologue (void);
+extern void ix86_expand_epilogue (int);
+
+extern void ix86_output_addr_vec_elt (FILE *, int);
+extern void ix86_output_addr_diff_elt (FILE *, int, int);
+
+#ifdef RTX_CODE
+extern int ix86_aligned_p (rtx);
+
+extern int standard_80387_constant_p (rtx);
+extern const char *standard_80387_constant_opcode (rtx);
+extern rtx standard_80387_constant_rtx (int);
+extern int standard_sse_constant_p (rtx);
+extern const char *standard_sse_constant_opcode (rtx, rtx);
+extern int symbolic_reference_mentioned_p (rtx);
+extern bool extended_reg_mentioned_p (rtx);
+extern bool x86_extended_QIreg_mentioned_p (rtx);
+extern bool x86_extended_reg_mentioned_p (rtx);
+extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+
+extern int ix86_expand_movmem (rtx, rtx, rtx, rtx);
+extern int ix86_expand_clrmem (rtx, rtx, rtx);
+extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
+
+extern bool legitimate_constant_p (rtx);
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern int legitimate_pic_address_disp_p (rtx);
+extern int legitimate_address_p (enum machine_mode, rtx, int);
+extern rtx legitimize_address (rtx, rtx, enum machine_mode);
+
+extern void print_reg (rtx, int, FILE*);
+extern void print_operand (FILE*, rtx, int);
+extern void print_operand_address (FILE*, rtx);
+extern bool output_addr_const_extra (FILE*, rtx);
+
+extern void split_di (rtx[], int, rtx[], rtx[]);
+extern void split_ti (rtx[], int, rtx[], rtx[]);
+
+extern const char *output_set_got (rtx, rtx);
+extern const char *output_387_binary_op (rtx, rtx*);
+extern const char *output_387_reg_move (rtx, rtx*);
+extern const char *output_fix_trunc (rtx, rtx*, int);
+extern const char *output_fp_compare (rtx, rtx*, int, int);
+
+extern void ix86_expand_clear (rtx);
+extern void ix86_expand_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
+extern void ix86_expand_push (enum machine_mode, rtx);
+extern rtx ix86_fixup_binary_operands (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern void ix86_expand_binary_operator (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
+ rtx[]);
+/* APPLE LOCAL begin 4176531 4424891 */
+extern const char *ix86_expand_convert_uns_DF2SI_sse (rtx *);
+extern const char *ix86_expand_convert_uns_SF2SI_sse (rtx *);
+extern const char *ix86_expand_convert_uns_DI2DF_sse (rtx *);
+extern const char *ix86_expand_convert_uns_SI2DF_sse (rtx *);
+extern const char *ix86_expand_convert_sign_DI2DF_sse (rtx *);
+/* APPLE LOCAL end 4176531 4424891 */
+extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
+extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
+ rtx[]);
+extern void ix86_expand_copysign (rtx []);
+extern void ix86_split_copysign_const (rtx []);
+extern void ix86_split_copysign_var (rtx []);
+extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern int ix86_match_ccmode (rtx, enum machine_mode);
+extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *);
+extern int ix86_use_fcomi_compare (enum rtx_code);
+extern void ix86_expand_branch (enum rtx_code, rtx);
+extern int ix86_expand_setcc (enum rtx_code, rtx);
+extern int ix86_expand_int_movcc (rtx[]);
+extern int ix86_expand_fp_movcc (rtx[]);
+extern bool ix86_expand_fp_vcond (rtx[]);
+extern bool ix86_expand_int_vcond (rtx[]);
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
+/* APPLE LOCAL end 5612787 mainline sse4 */
+extern int ix86_expand_int_addcc (rtx[]);
+extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void x86_initialize_trampoline (rtx, rtx, rtx);
+extern rtx ix86_zero_extend_to_Pmode (rtx);
+extern void ix86_split_long_move (rtx[]);
+extern void ix86_split_ashl (rtx *, rtx, enum machine_mode);
+extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
+extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
+extern rtx ix86_find_base_term (rtx);
+extern int ix86_check_movabs (rtx, int);
+
+extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
+extern int ix86_attr_length_immediate_default (rtx, int);
+extern int ix86_attr_length_address_default (rtx);
+
+extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
+
+extern rtx ix86_libcall_value (enum machine_mode);
+extern bool ix86_function_value_regno_p (int);
+extern bool ix86_function_arg_regno_p (int);
+extern int ix86_function_arg_boundary (enum machine_mode, tree);
+extern int ix86_return_in_memory (tree);
+/* APPLE LOCAL radar 4781080 */
+extern bool ix86_objc_fpreturn_msgcall (tree, bool);
+extern void ix86_va_start (tree, rtx);
+extern rtx ix86_va_arg (tree, tree);
+
+extern rtx ix86_force_to_memory (enum machine_mode, rtx);
+extern void ix86_free_from_memory (enum machine_mode);
+extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx,
+ rtx, rtx, rtx, rtx);
+extern bool ix86_hard_regno_mode_ok (int, enum machine_mode);
+extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int ix86_register_move_cost (enum machine_mode, enum reg_class,
+ enum reg_class);
+extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class,
+ enum machine_mode, int);
+extern bool ix86_cannot_change_mode_class (enum machine_mode,
+ enum machine_mode, enum reg_class);
+extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
+extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
+extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
+extern int ix86_mode_needed (int, rtx);
+extern void emit_i387_cw_initialization (int);
+extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
+extern void x86_order_regs_for_local_alloc (void);
+extern void x86_function_profiler (FILE *, int);
+extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
+
+extern void ix86_emit_i387_log1p (rtx, rtx);
+
+extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, int);
+extern rtx ix86_function_value (tree, tree, bool);
+#endif
+
+#endif
+
+#ifdef TREE_CODE
+extern int ix86_return_pops_args (tree, tree, int);
+
+extern int ix86_data_alignment (tree, int);
+extern int ix86_local_alignment (tree, int);
+extern int ix86_constant_alignment (tree, int);
+extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *);
+extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *);
+
+extern unsigned int i386_pe_section_type_flags (tree, const char *, int);
+extern void i386_pe_asm_named_section (const char *, unsigned int, tree);
+extern int x86_field_alignment (tree, int);
+#endif
+
+extern rtx ix86_tls_get_addr (void);
+extern rtx ix86_tls_module_base (void);
+
+extern void ix86_expand_vector_init (bool, rtx, rtx);
+extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
+extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+
+/* In winnt.c */
+extern int i386_pe_dllexport_name_p (const char *);
+extern int i386_pe_dllimport_name_p (const char *);
+extern void i386_pe_unique_section (tree, int);
+extern void i386_pe_declare_function_type (FILE *, const char *, int);
+extern void i386_pe_record_external_function (tree, const char *);
+extern void i386_pe_record_exported_symbol (const char *, int);
+extern void i386_pe_asm_file_end (FILE *);
+extern void i386_pe_encode_section_info (tree, rtx, int);
+extern const char *i386_pe_strip_name_encoding (const char *);
+extern const char *i386_pe_strip_name_encoding_full (const char *);
+extern void i386_pe_output_labelref (FILE *, const char *);
+extern bool i386_pe_valid_dllimport_attribute_p (tree);
+
+/* In winnt-cxx.c and winnt-stubs.c */
+extern void i386_pe_adjust_class_at_definition (tree);
+extern bool i386_pe_type_dllimport_p (tree);
+extern bool i386_pe_type_dllexport_p (tree);
+
+extern rtx maybe_get_pool_constant (rtx);
+
+extern char internal_label_prefix[16];
+extern int internal_label_prefix_len;
+
+enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS };
+struct ix86_address
+{
+ rtx base, index, disp;
+ HOST_WIDE_INT scale;
+ enum ix86_address_seg seg;
+};
+
+extern int ix86_decompose_address (rtx, struct ix86_address *);
+extern int memory_address_length (rtx addr);
+extern void x86_output_aligned_bss (FILE *, tree, const char *,
+ unsigned HOST_WIDE_INT, int);
+extern void x86_elf_aligned_common (FILE *, const char *,
+ unsigned HOST_WIDE_INT, int);
+
+#ifdef RTX_CODE
+extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
+ enum rtx_code *, enum rtx_code *);
+extern enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
+#endif
+
+/* APPLE LOCAL begin CW asm blocks */
+extern const char *i386_iasm_register_name (const char *regname, char *buf);
+extern bool iasm_x86_needs_swapping (const char *);
+extern bool iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses,
+ bool must_be_reg, bool must_not_be_reg, void *);
+extern void iasm_x86_print_prefix (char *buf, tree prefix_list);
+extern tree iasm_raise_reg (tree);
+/* APPLE LOCAL end CW asm blocks */
+
+/* APPLE LOCAL 3399553 */
+extern void ix86_expand_flt_rounds (rtx);
+extern int asm_preferred_eh_data_format (int, int);
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
new file mode 100644
index 000000000..0e212967a
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
@@ -0,0 +1,23515 @@
+/* Subroutines used for code generation on IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "cgraph.h"
+#include "tree-gimple.h"
+#include "dwarf2.h"
+#include "tm-constrs.h"
+
+/* APPLE LOCAL begin pascal strings */
+#include "../../libcpp/internal.h"
+extern struct cpp_reader* parse_in;
+/* APPLE LOCAL end pascal strings */
+/* APPLE LOCAL begin regparmandstackparm */
+#include "integrate.h"
+#include "tree-inline.h"
+#include "splay-tree.h"
+#include "tree-pass.h"
+#include "c-tree.h"
+#include "c-common.h"
+/* APPLE LOCAL end regparmandstackparm */
+/* APPLE LOCAL begin dwarf call/pop 5221468 */
+#include "debug.h"
+#include "dwarf2out.h"
+/* APPLE LOCAL end dwarf call/pop 5221468 */
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT (-1)
+#endif
+
+/* Return index of given mode in mult and division cost tables. */
+#define MODE_INDEX(mode) \
+ ((mode) == QImode ? 0 \
+ : (mode) == HImode ? 1 \
+ : (mode) == SImode ? 2 \
+ : (mode) == DImode ? 3 \
+ : 4)
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+static const
+struct processor_costs size_cost = { /* costs for tuning for size */
+ COSTS_N_BYTES (2), /* cost of an add instruction */
+ COSTS_N_BYTES (3), /* cost of a lea instruction */
+ COSTS_N_BYTES (2), /* variable shift costs */
+ COSTS_N_BYTES (3), /* constant shift costs */
+ {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* cost of movsx */
+ COSTS_N_BYTES (3), /* cost of movzx */
+ 0, /* "large" insn */
+ 2, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 2}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 3, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {3, 3}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 3, /* cost of moving SSE register */
+ {3, 3, 3}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {3, 3, 3}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_BYTES (2), /* cost of FMUL instruction. */
+ COSTS_N_BYTES (2), /* cost of FDIV instruction. */
+ COSTS_N_BYTES (2), /* cost of FABS instruction. */
+ COSTS_N_BYTES (2), /* cost of FCHS instruction. */
+ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
+};
+
+/* Processor costs (relative to an add) */
+static const
+struct processor_costs i386_cost = { /* 386 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (1), /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (27), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (88), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (22), /* cost of FABS instruction. */
+ COSTS_N_INSNS (24), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs i486_cost = { /* 486 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
+ 1, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (16), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (73), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentium_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (39), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentiumpro_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 32, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs k6_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (2), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 4, /* MOVE_RATIO */
+ 3, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 6, /* MMX or SSE register to integer */
+ 32, /* size of prefetch block */
+ 1, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (2), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs athlon_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (24), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs k8_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 3, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentium4_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (4), /* constant shift costs */
+ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 12, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 10, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (7), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (43), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs nocona_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 3, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 6, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 6, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {12, 12, 12}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 8, /* MMX or SSE register to integer */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (40), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
+};
+/* APPLE LOCAL begin mainline */
+static const
+struct processor_costs core2_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (22), /* HI */
+ COSTS_N_INSNS (22), /* SI */
+ COSTS_N_INSNS (22), /* DI */
+ COSTS_N_INSNS (22)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 16, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of loading integer registers */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {6, 6, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 2, /* MMX or SSE register to integer */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (32), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
+};
+/* APPLE LOCAL end mainline */
+/* Generic64 should produce code tuned for Nocona and K8. */
+static const
+struct processor_costs generic64_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ /* On all chips taken into consideration lea is 2 cycles and more. With
+ this cost however our current implementation of synth_mult results in
+ use of unnecessary temporary registers causing regression on several
+ SPECfp benchmarks. */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
+ is increased to perhaps more appropriate value of 5. */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+};
+
+/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
+static const
+struct processor_costs generic32_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+};
+
+const struct processor_costs *ix86_cost = &pentium_cost;
+
+/* Processor feature/optimization bitmasks. */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+/* APPLE LOCAL mainline */
+#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
+#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
+#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
+
+/* Generic instruction choice should be common subset of supported CPUs
+ (PPro/PENT4/NOCONA/Athlon/K8). */
+
+/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
+ Generic64 seems like good code size tradeoff. We can't enable it for 32bit
+ generic because it is not working well with PPro base chips. */
+/* APPLE LOCAL begin mainline */
+const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
+const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_zero_extend_with_and = m_486 | m_PENT;
+const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC /* m_386 | m_K6 */;
+const int x86_double_with_add = ~m_386;
+const int x86_use_bit_test = m_386;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
+const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
+const int x86_3dnow_a = m_ATHLON_K8;
+/* APPLE LOCAL end mainline */
+const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
+/* Branch hints were put in P4 based on simulation result. But
+ after P4 was made, no performance benefit was observed with
+ branch hints. It also increases the code size. As the result,
+ icc never generates branch hints. */
+const int x86_branch_hints = 0;
+const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
+/* We probably ought to watch for partial register stalls on Generic32
+ compilation setting as well. However in current implementation the
+ partial register stalls are not eliminated very well - they can
+ be introduced via subregs synthesized by combine and can happen
+ in caller/callee saving sequences.
+ Because this option pays back little on PPro based chips and is in conflict
+ with partial reg. dependencies used by Athlon/P4 based chips, it is better
+ to leave it off for generic32 for now. */
+const int x86_partial_reg_stall = m_PPRO;
+/* APPLE LOCAL begin mainline */
+const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
+const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
+const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
+const int x86_use_mov0 = m_K6;
+const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
+const int x86_read_modify_write = ~m_PENT;
+const int x86_read_modify = ~(m_PENT | m_PPRO);
+const int x86_split_long_moves = m_PPRO;
+const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
+const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
+const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
+const int x86_qimode_math = ~(0);
+const int x86_promote_qi_regs = 0;
+/* On PPro this flag is meant to avoid partial register stalls. Just like
+ the x86_partial_reg_stall this option might be considered for Generic32
+ if our scheme for avoiding partial stalls was more effective. */
+const int x86_himode_math = ~(m_PPRO);
+const int x86_promote_hi_regs = m_PPRO;
+const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |m_GENERIC;
+const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC);
+const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_shift1 = ~m_486;
+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
+ that thread 128bit SSE registers as single units versus K8 based chips that
+ divide SSE registers to two 64bit halves.
+ x86_sse_partial_reg_dependency promote all store destinations to be 128bit
+ to allow register renaming on 128bit SSE units, but usually results in one
+ extra microop on 64bit SSE units. Experimental results shows that disabling
+ this option on P4 brings over 20% SPECfp regression, while enabling it on
+ K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
+ of moves. */
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+/* Set for machines where the type and dependencies are resolved on SSE
+ register parts instead of whole registers, so we may maintain just
+ lower part of scalar values in proper format leaving the upper part
+ undefined. */
+const int x86_sse_split_regs = m_ATHLON_K8;
+const int x86_sse_typeless_stores = m_ATHLON_K8;
+const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
+const int x86_use_ffreep = m_ATHLON_K8;
+const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
+const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
+
+/* ??? Allowing interunit moves makes it all too easy for the compiler to put
+ integer data in xmm registers. Which results in pretty abysmal code. */
+/* APPLE LOCAL 5612787 mainline sse4 */
+const int x86_inter_unit_moves = ~(m_ATHLON_K8);
+
+const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
+/* Some CPU cores are not able to predict more than 4 branch instructions in
+ the 16 byte window. */
+const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_CORE2 | m_GENERIC;
+const int x86_use_bt = m_ATHLON_K8;
+/* APPLE LOCAL begin */
+/* See comment in darwin override options for what needs fixing.
+ Most of this code has been rewritten in mainline anyhow.
+ All we've done here is remove the const since we assign to
+ them in SUBTARGET_OVERRIDE_OPTIONS. */
+/* Compare and exchange was added for 80486. */
+int x86_cmpxchg = ~m_386;
+/* Compare and exchange 8 bytes was added for pentium. */
+int x86_cmpxchg8b = ~(m_386 | m_486);
+/* Compare and exchange 16 bytes was added for nocona. */
+/* APPLE LOCAL mainline */
+int x86_cmpxchg16b = m_NOCONA | m_CORE2;
+/* Exchange and add was added for 80486. */
+int x86_xadd = ~m_386;
+/* APPLE LOCAL begin mainline bswap */
+/* Byteswap was added for 80486. */
+int x86_bswap = ~m_386;
+/* APPLE LOCAL end mainline bswap */
+/* APPLE LOCAL end */
+const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
+/* APPLE LOCAL end mainline */
+
+/* In case the average insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+#define FAST_PROLOGUE_INSN_COUNT 20
+
+/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
+static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
+static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
+static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+ REGNO. Used by REGNO_REG_CLASS in i386.h. */
+
+enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
+{
+ /* ax, dx, cx, bx */
+ AREG, DREG, CREG, BREG,
+ /* si, di, bp, sp */
+ SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
+ /* FP registers */
+ FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
+ FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
+ /* arg pointer */
+ NON_Q_REGS,
+ /* flags, fpsr, dirflag, frame */
+ NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
+ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+ MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
+ MMX_REGS, MMX_REGS,
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+};
+
+/* The "default" register map used in 32bit mode. */
+
+int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
+ 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+static int const x86_64_int_parameter_registers[6] =
+{
+ 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
+ FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
+};
+
+static int const x86_64_int_return_registers[4] =
+{
+ 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
+};
+
+/* The "default" register map used in 64bit mode. */
+int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
+ 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
+ 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
+ 8,9,10,11,12,13,14,15, /* extended integer registers */
+ 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
+};
+
+/* Define the register numbers to be used in Dwarf debugging information.
+ The SVR4 reference port C compiler uses the following register numbers
+ in its Dwarf output code:
+ 0 for %eax (gcc regno = 0)
+ 1 for %ecx (gcc regno = 2)
+ 2 for %edx (gcc regno = 1)
+ 3 for %ebx (gcc regno = 3)
+ 4 for %esp (gcc regno = 7)
+ 5 for %ebp (gcc regno = 6)
+ 6 for %esi (gcc regno = 4)
+ 7 for %edi (gcc regno = 5)
+ The following three DWARF register numbers are never generated by
+ the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
+ believes these numbers have these meanings.
+ 8 for %eip (no gcc equivalent)
+ 9 for %eflags (gcc regno = 17)
+ 10 for %trapno (no gcc equivalent)
+ It is not at all clear how we should number the FP stack registers
+ for the x86 architecture. If the version of SDB on x86/svr4 were
+ a bit less brain dead with respect to floating-point then we would
+ have a precedent to follow with respect to DWARF register numbers
+ for x86 FP registers, but the SDB on x86/svr4 is so completely
+ broken with respect to FP registers that it is hardly worth thinking
+ of it as something to strive for compatibility with.
+ The version of x86/svr4 SDB I have at the moment does (partially)
+ seem to believe that DWARF register number 11 is associated with
+ the x86 register %st(0), but that's about all. Higher DWARF
+ register numbers don't seem to be associated with anything in
+ particular, and even for DWARF regno 11, SDB only seems to under-
+ stand that it should say that a variable lives in %st(0) (when
+ asked via an `=' command) if we said it was in DWARF regno 11,
+ but SDB still prints garbage when asked for the value of the
+ variable in question (via a `/' command).
+ (Also note that the labels SDB prints for various FP stack regs
+ when doing an `x' command are all wrong.)
+ Note that these problems generally don't affect the native SVR4
+ C compiler because it doesn't allow the use of -O with -g and
+ because when it is *not* optimizing, it allocates a memory
+ location for each floating-point variable, and the memory
+ location is what gets described in the DWARF AT_location
+ attribute for the variable in question.
+ Regardless of the severe mental illness of the x86/svr4 SDB, we
+ do something sensible here and we use the following DWARF
+ register numbers. Note that these are all stack-top-relative
+ numbers.
+ 11 for %st(0) (gcc regno = 8)
+ 12 for %st(1) (gcc regno = 9)
+ 13 for %st(2) (gcc regno = 10)
+ 14 for %st(3) (gcc regno = 11)
+ 15 for %st(4) (gcc regno = 12)
+ 16 for %st(5) (gcc regno = 13)
+ 17 for %st(6) (gcc regno = 14)
+ 18 for %st(7) (gcc regno = 15)
+*/
+int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
+ 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
+ -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+/* Test and compare insns in i386.md store the information needed to
+ generate branch and scc insns here. */
+
+rtx ix86_compare_op0 = NULL_RTX;
+rtx ix86_compare_op1 = NULL_RTX;
+rtx ix86_compare_emitted = NULL_RTX;
+
+/* Size of the register save area. */
+#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
+
+/* Define the structure for the machine field in struct function. */
+
+struct stack_local_entry GTY(())
+{
+ unsigned short mode;
+ unsigned short n;
+ rtx rtl;
+ struct stack_local_entry *next;
+};
+
+/* Structure describing stack frame layout.
+ Stack grows downward:
+
+ [arguments]
+ <- ARG_POINTER
+ saved pc
+
+ saved frame pointer if frame_pointer_needed
+ <- HARD_FRAME_POINTER
+ [saved regs]
+
+ [padding1] \
+ )
+ [va_arg registers] (
+ > to_allocate <- FRAME_POINTER
+ [frame] (
+ )
+ [padding2] /
+ */
+struct ix86_frame
+{
+ int nregs;
+ int padding1;
+ int va_arg_size;
+ HOST_WIDE_INT frame;
+ int padding2;
+ int outgoing_arguments_size;
+ int red_zone_size;
+
+ HOST_WIDE_INT to_allocate;
+ /* The offsets relative to ARG_POINTER. */
+ HOST_WIDE_INT frame_pointer_offset;
+ HOST_WIDE_INT hard_frame_pointer_offset;
+ HOST_WIDE_INT stack_pointer_offset;
+
+ /* When save_regs_using_mov is set, emit prologue using
+ move instead of push instructions. */
+ bool save_regs_using_mov;
+};
+
+/* Code model option. */
+enum cmodel ix86_cmodel;
+/* Asm dialect. */
+enum asm_dialect ix86_asm_dialect = ASM_ATT;
+/* TLS dialects. */
+enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
+
+/* Which unit we are generating floating point math for. */
+enum fpmath_unit ix86_fpmath;
+
+/* Which cpu are we scheduling for. */
+enum processor_type ix86_tune;
+/* Which instruction set architecture to use. */
+enum processor_type ix86_arch;
+
+/* true if sse prefetch instruction is not NOOP. */
+int x86_prefetch_sse;
+
+/* ix86_regparm_string as a number */
+static int ix86_regparm;
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* True if SSE population count insn supported. */
+int x86_popcnt;
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* -mstackrealign option */
+extern int ix86_force_align_arg_pointer;
+static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
+
+/* Preferred alignment for stack boundary in bits. */
+unsigned int ix86_preferred_stack_boundary;
+/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */
+unsigned int ix86_save_preferred_stack_boundary;
+/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */
+
+/* Values 1-5: see jump.c */
+int ix86_branch_cost;
+
+/* Variables which are this size or smaller are put in the data/bss
+ or ldata/lbss sections. */
+
+int ix86_section_threshold = 65536;
+
+/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
+char internal_label_prefix[16];
+int internal_label_prefix_len;
+
+static bool ix86_handle_option (size_t, const char *, int);
+static void output_pic_addr_const (FILE *, rtx, int);
+static void put_condition_code (enum rtx_code, enum machine_mode,
+ int, int, FILE *);
+static const char *get_some_local_dynamic_name (void);
+static int get_some_local_dynamic_name_1 (rtx *, void *);
+static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
+static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
+ rtx *);
+static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
+static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
+ enum machine_mode);
+static rtx get_thread_pointer (int);
+static rtx legitimize_tls_address (rtx, enum tls_model, int);
+static void get_pc_thunk_name (char [32], unsigned int);
+static rtx gen_push (rtx);
+static int ix86_flags_dependent (rtx, rtx, enum attr_type);
+static int ix86_agi_dependent (rtx, rtx, enum attr_type);
+static struct machine_function * ix86_init_machine_status (void);
+static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
+static int ix86_nsaved_regs (void);
+static void ix86_emit_save_regs (void);
+static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
+static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
+static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static HOST_WIDE_INT ix86_GOT_alias_set (void);
+static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
+static rtx ix86_expand_aligntest (rtx, int);
+static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
+static int ix86_issue_rate (void);
+static int ix86_adjust_cost (rtx, rtx, rtx, int);
+static int ia32_multipass_dfa_lookahead (void);
+static void ix86_init_mmx_sse_builtins (void);
+static rtx x86_this_parameter (tree);
+static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+ HOST_WIDE_INT, tree);
+static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void x86_file_start (void);
+static void ix86_reorg (void);
+static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
+static tree ix86_build_builtin_va_list (void);
+static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, int *, int);
+static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
+static bool ix86_scalar_mode_supported_p (enum machine_mode);
+static bool ix86_vector_mode_supported_p (enum machine_mode);
+
+static int ix86_address_cost (rtx);
+static bool ix86_cannot_force_const_mem (rtx);
+static rtx ix86_delegitimize_address (rtx);
+
+static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+
+struct builtin_description;
+static rtx ix86_expand_sse_comi (const struct builtin_description *,
+ tree, rtx);
+static rtx ix86_expand_sse_compare (const struct builtin_description *,
+ tree, rtx);
+static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
+static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
+static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx ix86_expand_store_builtin (enum insn_code, tree);
+static rtx safe_vector_operand (rtx, enum machine_mode);
+static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
+static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
+static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
+static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
+static int ix86_fp_comparison_cost (enum rtx_code code);
+static unsigned int ix86_select_alt_pic_regnum (void);
+static int ix86_save_reg (unsigned int, int);
+static void ix86_compute_frame_layout (struct ix86_frame *);
+static int ix86_comp_type_attributes (tree, tree);
+static int ix86_function_regparm (tree, tree);
+const struct attribute_spec ix86_attribute_table[];
+static bool ix86_function_ok_for_sibcall (tree, tree);
+static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
+static int ix86_value_regno (enum machine_mode, tree, tree);
+static bool contains_128bit_aligned_vector_p (tree);
+static rtx ix86_struct_value_rtx (tree, int);
+static bool ix86_ms_bitfield_layout_p (tree);
+static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
+static int extended_reg_mentioned_1 (rtx *, void *);
+static bool ix86_rtx_costs (rtx, int, int, int *);
+static int min_insn_size (rtx);
+static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
+static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
+static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, bool);
+static void ix86_init_builtins (void);
+static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+/* APPLE LOCAL mangle_type 7105099 */
+static const char *ix86_mangle_type (tree);
+static tree ix86_stack_protect_fail (void);
+static rtx ix86_internal_arg_pointer (void);
+static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
+
+/* This function is only used on Solaris. */
+static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
+ ATTRIBUTE_UNUSED;
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the exception
+ of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+ use SF or DFmode move instead of DImode to avoid reformatting penalties.
+
+ Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding).
+ */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+static const char * const x86_64_reg_class_name[] = {
+ "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
+ "sseup", "x87", "x87up", "cplx87", "no"
+};
+
+#define MAX_CLASSES 4
+
+/* Table of constants used by fldpi, fldln2, etc.... */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+static void init_ext_80387_constants (void);
+static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
+static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
+static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
+static section *x86_64_elf_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+ ATTRIBUTE_UNUSED;
+
+/* Initialize the GCC target structure. */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+# undef TARGET_MERGE_DECL_ATTRIBUTES
+# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifndef SUBTARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
+#endif
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
+#ifdef ASM_QUAD
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
+#endif
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+ ia32_multipass_dfa_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START x86_file_start
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+ (TARGET_DEFAULT \
+ | TARGET_64BIT_DEFAULT \
+ | TARGET_SUBTARGET_DEFAULT \
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION ix86_handle_option
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
+#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
+#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
+#endif
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+/* APPLE LOCAL begin mangle_type 7105099 */
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
+/* APPLE LOCAL end mangle_type 7105099 */
+
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ix86_function_value
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
+/* Implement TARGET_HANDLE_OPTION. */
+
+static bool
+ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
+{
+ switch (code)
+ {
+ case OPT_m3dnow:
+ if (!value)
+ {
+ target_flags &= ~MASK_3DNOW_A;
+ target_flags_explicit |= MASK_3DNOW_A;
+ }
+ return true;
+
+ case OPT_mmmx:
+ if (!value)
+ {
+ target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
+ target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
+ }
+ return true;
+
+ case OPT_msse:
+ if (!value)
+ {
+ target_flags &= ~(MASK_SSE2 | MASK_SSE3);
+ target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
+ }
+ return true;
+
+ case OPT_msse2:
+ if (!value)
+ {
+ target_flags &= ~MASK_SSE3;
+ target_flags_explicit |= MASK_SSE3;
+ }
+ return true;
+
+ default:
+ return true;
+ }
+}
+
+/* APPLE LOCAL begin 4760857 optimization pragmas. */
+/* Hoisted so it can be used by reset_optimization_options. */
+static struct ptt
+ {
+ const struct processor_costs *cost; /* Processor costs */
+ const int target_enable; /* Target flags to enable. */
+ const int target_disable; /* Target flags to disable. */
+ const int align_loop; /* Default alignments. */
+ const int align_loop_max_skip;
+ const int align_jump;
+ const int align_jump_max_skip;
+ const int align_func;
+ }
+const processor_target_table[PROCESSOR_max] =
+ {
+ {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
+ {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
+ {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
+ {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
+ {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
+ {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
+ /* APPLE LOCAL mainline */
+ {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
+ };
+/* APPLE LOCAL end 4760857 optimization pragmas. */
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+void
+override_options (void)
+{
+ int i;
+ int ix86_tune_defaulted = 0;
+ /* APPLE LOCAL mainline */
+ int ix86_arch_specified = 0;
+
+ /* Comes from final.c -- no real reason to change it. */
+#define MAX_CODE_ALIGN 16
+
+ /* APPLE LOCAL begin 4760857 optimization pragmas. */
+ /* processor_target_table moved to file scope. */
+ /* APPLE LOCAL end 4760857 optimization pragmas. */
+
+ static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
+ static struct pta
+ {
+ const char *const name; /* processor name or nickname. */
+ const enum processor_type processor;
+ const enum pta_flags
+ {
+ PTA_SSE = 1,
+ PTA_SSE2 = 2,
+ PTA_SSE3 = 4,
+ PTA_MMX = 8,
+ PTA_PREFETCH_SSE = 16,
+ PTA_3DNOW = 32,
+ PTA_3DNOW_A = 64,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* APPLE LOCAL begin mainline */
+ PTA_64BIT = 128,
+ PTA_SSSE3 = 256,
+ /* APPLE LOCAL end mainline */
+ PTA_CX16 = 1 << 9,
+ PTA_POPCNT = 1 << 10,
+ PTA_ABM = 1 << 11,
+ PTA_SSE4A = 1 << 12,
+ PTA_NO_SAHF = 1 << 13,
+ PTA_SSE4_1 = 1 << 14,
+ PTA_SSE4_2 = 1 << 15
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ } flags;
+ }
+ const processor_alias_table[] =
+ {
+ {"i386", PROCESSOR_I386, 0},
+ {"i486", PROCESSOR_I486, 0},
+ {"i586", PROCESSOR_PENTIUM, 0},
+ {"pentium", PROCESSOR_PENTIUM, 0},
+ {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
+ {"winchip-c6", PROCESSOR_I486, PTA_MMX},
+ {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
+ {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
+ {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
+ {"i686", PROCESSOR_PENTIUMPRO, 0},
+ {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
+ {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
+ {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
+ {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
+ {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
+ {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ /* APPLE LOCAL begin mainline */
+ {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_64BIT | PTA_MMX
+ | PTA_PREFETCH_SSE},
+ /* APPLE LOCAL end mainline */
+ {"k6", PROCESSOR_K6, PTA_MMX},
+ {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
+ {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
+ {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A},
+ {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
+ | PTA_3DNOW | PTA_3DNOW_A},
+ {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
+ | PTA_SSE | PTA_SSE2 },
+ {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
+ {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
+ };
+
+ int const pta_size = ARRAY_SIZE (processor_alias_table);
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+ SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+ SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+ /* -fPIC is the default for x86_64. */
+ if (TARGET_MACHO && TARGET_64BIT)
+ flag_pic = 2;
+
+ /* Set the default values for switches whose default depends on TARGET_64BIT
+ in case they weren't overwritten by command line options. */
+ if (TARGET_64BIT)
+ {
+ /* Mach-O doesn't support omitting the frame pointer for now. */
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 1;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = 0;
+ }
+ else
+ {
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = 0;
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 0;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+ }
+
+ /* Need to check -mtune=generic first. */
+ if (ix86_tune_string)
+ {
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "i686")
+ /* As special support for cross compilers we read -mtune=native
+ as -mtune=generic. With native compilers we won't see the
+ -mtune=native, as it was changed by the driver. */
+ || !strcmp (ix86_tune_string, "native"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ else if (!strncmp (ix86_tune_string, "generic", 7))
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+ }
+ else
+ {
+ if (ix86_arch_string)
+ ix86_tune_string = ix86_arch_string;
+ if (!ix86_tune_string)
+ {
+ ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
+ ix86_tune_defaulted = 1;
+ }
+
+ /* ix86_tune_string is set to ix86_arch_string or defaulted. We
+ need to use a sensible tune option. */
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "x86-64")
+ || !strcmp (ix86_tune_string, "i686"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ }
+ if (!strcmp (ix86_tune_string, "x86-64"))
+ warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
+ "-mtune=generic instead as appropriate.");
+
+ if (!ix86_arch_string)
+ ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
+ /* APPLE LOCAL begin mainline */
+ else
+ ix86_arch_specified = 1;
+ /* APPLE LOCAL end mainline */
+ if (!strcmp (ix86_arch_string, "generic"))
+ error ("generic CPU can be used only for -mtune= switch");
+ if (!strncmp (ix86_arch_string, "generic", 7))
+ error ("bad value (%s) for -march= switch", ix86_arch_string);
+
+ if (ix86_cmodel_string != 0)
+ {
+ if (!strcmp (ix86_cmodel_string, "small"))
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ else if (!strcmp (ix86_cmodel_string, "medium"))
+ ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
+ else if (flag_pic)
+ sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
+ else if (!strcmp (ix86_cmodel_string, "32"))
+ ix86_cmodel = CM_32;
+ else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
+ ix86_cmodel = CM_KERNEL;
+ else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
+ ix86_cmodel = CM_LARGE;
+ else
+ error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
+ }
+ else
+ {
+ ix86_cmodel = CM_32;
+ if (TARGET_64BIT)
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ }
+ if (ix86_asm_string != 0)
+ {
+ if (! TARGET_MACHO
+ && !strcmp (ix86_asm_string, "intel"))
+ ix86_asm_dialect = ASM_INTEL;
+ else if (!strcmp (ix86_asm_string, "att"))
+ ix86_asm_dialect = ASM_ATT;
+ else
+ error ("bad value (%s) for -masm= switch", ix86_asm_string);
+ }
+ if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
+ error ("code model %qs not supported in the %s bit mode",
+ ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
+ if (ix86_cmodel == CM_LARGE)
+ sorry ("code model %<large%> not supported yet");
+ if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
+ sorry ("%i-bit mode not compiled in",
+ (target_flags & MASK_64BIT) ? 64 : 32);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
+ {
+ ix86_arch = processor_alias_table[i].processor;
+ /* Default cpu tuning to the architecture. */
+ ix86_tune = ix86_arch;
+ if (processor_alias_table[i].flags & PTA_MMX
+ && !(target_flags_explicit & MASK_MMX))
+ target_flags |= MASK_MMX;
+ if (processor_alias_table[i].flags & PTA_3DNOW
+ && !(target_flags_explicit & MASK_3DNOW))
+ target_flags |= MASK_3DNOW;
+ if (processor_alias_table[i].flags & PTA_3DNOW_A
+ && !(target_flags_explicit & MASK_3DNOW_A))
+ target_flags |= MASK_3DNOW_A;
+ if (processor_alias_table[i].flags & PTA_SSE
+ && !(target_flags_explicit & MASK_SSE))
+ target_flags |= MASK_SSE;
+ if (processor_alias_table[i].flags & PTA_SSE2
+ && !(target_flags_explicit & MASK_SSE2))
+ target_flags |= MASK_SSE2;
+ if (processor_alias_table[i].flags & PTA_SSE3
+ && !(target_flags_explicit & MASK_SSE3))
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL begin mainline */
+ if (processor_alias_table[i].flags & PTA_SSSE3
+ && !(target_flags_explicit & MASK_SSSE3))
+ target_flags |= MASK_SSSE3;
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ if (processor_alias_table[i].flags & PTA_SSE4_1
+ && !(target_flags_explicit & MASK_SSE4_1))
+ target_flags |= MASK_SSE4_1;
+ if (processor_alias_table[i].flags & PTA_SSE4_2
+ && !(target_flags_explicit & MASK_SSE4_2))
+ target_flags |= MASK_SSE4_2;
+ if (processor_alias_table[i].flags & PTA_SSE4A
+ && !(target_flags_explicit & MASK_SSE4A))
+ target_flags |= MASK_SSE4A;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
+ x86_prefetch_sse = true;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+ break;
+ }
+
+ if (i == pta_size)
+ error ("bad value (%s) for -march= switch", ix86_arch_string);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
+ {
+ ix86_tune = processor_alias_table[i].processor;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ {
+ if (ix86_tune_defaulted)
+ {
+ ix86_tune_string = "x86-64";
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string,
+ processor_alias_table[i].name))
+ break;
+ ix86_tune = processor_alias_table[i].processor;
+ }
+ else
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+ }
+ /* Intel CPUs have always interpreted SSE prefetch instructions as
+ NOPs; so, we can enable SSE prefetch instructions even when
+ -mtune (rather than -march) points us to a processor that has them.
+ However, the VIA C3 gives a SIGILL, so we only do that for i686 and
+ higher processors. */
+ if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
+ x86_prefetch_sse = true;
+ break;
+ }
+ if (i == pta_size)
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+
+ if (optimize_size)
+ ix86_cost = &size_cost;
+ else
+ ix86_cost = processor_target_table[ix86_tune].cost;
+ target_flags |= processor_target_table[ix86_tune].target_enable;
+ target_flags &= ~processor_target_table[ix86_tune].target_disable;
+
+ /* Arrange to set up i386_stack_locals for all functions. */
+ init_machine_status = ix86_init_machine_status;
+
+ /* Validate -mregparm= value. */
+ if (ix86_regparm_string)
+ {
+ i = atoi (ix86_regparm_string);
+ if (i < 0 || i > REGPARM_MAX)
+ error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
+ else
+ ix86_regparm = i;
+ }
+ else
+ if (TARGET_64BIT)
+ ix86_regparm = REGPARM_MAX;
+
+ /* If the user has provided any of the -malign-* options,
+ warn and use that value only if -falign-* is not set.
+ Remove this code in GCC 3.2 or later. */
+ if (ix86_align_loops_string)
+ {
+ warning (0, "-malign-loops is obsolete, use -falign-loops");
+ if (align_loops == 0)
+ {
+ i = atoi (ix86_align_loops_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_loops = 1 << i;
+ }
+ }
+
+ if (ix86_align_jumps_string)
+ {
+ warning (0, "-malign-jumps is obsolete, use -falign-jumps");
+ if (align_jumps == 0)
+ {
+ i = atoi (ix86_align_jumps_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_jumps = 1 << i;
+ }
+ }
+
+ if (ix86_align_funcs_string)
+ {
+ warning (0, "-malign-functions is obsolete, use -falign-functions");
+ if (align_functions == 0)
+ {
+ i = atoi (ix86_align_funcs_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_functions = 1 << i;
+ }
+ }
+
+ /* Default align_* from the processor table. */
+ if (align_loops == 0)
+ {
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ }
+ if (align_jumps == 0)
+ {
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ }
+ if (align_functions == 0)
+ {
+ align_functions = processor_target_table[ix86_tune].align_func;
+ }
+
+ /* Validate -mbranch-cost= value, or provide default. */
+ ix86_branch_cost = ix86_cost->branch_cost;
+ if (ix86_branch_cost_string)
+ {
+ i = atoi (ix86_branch_cost_string);
+ if (i < 0 || i > 5)
+ error ("-mbranch-cost=%d is not between 0 and 5", i);
+ else
+ ix86_branch_cost = i;
+ }
+ if (ix86_section_threshold_string)
+ {
+ i = atoi (ix86_section_threshold_string);
+ if (i < 0)
+ error ("-mlarge-data-threshold=%d is negative", i);
+ else
+ ix86_section_threshold = i;
+ }
+
+ if (ix86_tls_dialect_string)
+ {
+ if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU;
+ else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU2;
+ else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
+ ix86_tls_dialect = TLS_DIALECT_SUN;
+ else
+ error ("bad value (%s) for -mtls-dialect= switch",
+ ix86_tls_dialect_string);
+ }
+ /* APPLE LOCAL begin mainline */
+ if (TARGET_64BIT)
+ {
+ if (TARGET_ALIGN_DOUBLE)
+ error ("-malign-double makes no sense in the 64bit mode");
+ if (TARGET_RTD)
+ error ("-mrtd calling convention not supported in the 64bit mode");
+ /* APPLE LOCAL begin radar 4877693 */
+ if (ix86_force_align_arg_pointer)
+ error ("-mstackrealign not supported in the 64bit mode");
+ /* APPLE LOCAL end radar 4877693 */
+
+ /* Enable by default the SSE and MMX builtins. Do allow the user to
+ explicitly disable any of these. In particular, disabling SSE and
+ MMX for kernel code is extremely useful. */
+ if (!ix86_arch_specified)
+ target_flags
+ |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE
+ | TARGET_SUBTARGET64_DEFAULT) & ~target_flags_explicit);
+ /* APPLE LOCAL begin mainline candidate */
+ /* Disable the red zone for kernel compilation.
+ ??? Why aren't we using -mcmodel=kernel? */
+ if (TARGET_MACHO
+ && (flag_mkernel || flag_apple_kext))
+ target_flags |= MASK_NO_RED_ZONE;
+ /* APPLE LOCAL end mainline candidate */
+ }
+ else
+ {
+ if (!ix86_arch_specified)
+ target_flags |= (TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit);
+
+ /* i386 ABI does not specify red zone. It still makes sense to use it
+ when programmer takes care to stack from being destroyed. */
+ if (!(target_flags_explicit & MASK_NO_RED_ZONE))
+ target_flags |= MASK_NO_RED_ZONE;
+ }
+
+ /* APPLE LOCAL end mainline */
+ /* Keep nonleaf frame pointers. */
+ if (flag_omit_frame_pointer)
+ target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+ else if (TARGET_OMIT_LEAF_FRAME_POINTER)
+ flag_omit_frame_pointer = 1;
+
+ /* If we're doing fast math, we don't care about comparison order
+ wrt NaNs. This lets us use a shorter comparison sequence. */
+ if (flag_finite_math_only)
+ target_flags &= ~MASK_IEEE_FP;
+
+ /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
+ since the insns won't need emulation. */
+ if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
+ target_flags &= ~MASK_NO_FANCY_MATH_387;
+
+ /* Likewise, if the target doesn't have a 387, or we've specified
+ software floating point, don't use 387 inline intrinsics. */
+ if (!TARGET_80387)
+ target_flags |= MASK_NO_FANCY_MATH_387;
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Turn on SSE4.1 builtins for -msse4.2. */
+ if (TARGET_SSE4_2)
+ target_flags |= MASK_SSE4_1;
+ /* Turn on SSSE3 builtins for -msse4.1. */
+ if (TARGET_SSE4_1)
+ target_flags |= MASK_SSSE3;
+ /* Turn on SSE3 builtins for -msse4a. */
+ if (TARGET_SSE4A)
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* APPLE LOCAL begin mainline */
+ /* Turn on SSE3 builtins for -mssse3. */
+ if (TARGET_SSSE3)
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL end mainline */
+ /* Turn on SSE2 builtins for -msse3. */
+ if (TARGET_SSE3)
+ target_flags |= MASK_SSE2;
+
+ /* Turn on SSE builtins for -msse2. */
+ if (TARGET_SSE2)
+ target_flags |= MASK_SSE;
+
+ /* Turn on MMX builtins for -msse. */
+ if (TARGET_SSE)
+ {
+ target_flags |= MASK_MMX & ~target_flags_explicit;
+ x86_prefetch_sse = true;
+ }
+
+ /* Turn on MMX builtins for 3Dnow. */
+ if (TARGET_3DNOW)
+ target_flags |= MASK_MMX;
+
+ /* APPLE LOCAL mainline */
+ /* Moved this up... */
+ /* Validate -mpreferred-stack-boundary= value, or provide default.
+ The default of 128 bits is for Pentium III's SSE __m128. We can't
+ change it because of optimize_size. Otherwise, we can't mix object
+ files compiled with -Os and -On. */
+ ix86_preferred_stack_boundary = 128;
+ if (ix86_preferred_stack_boundary_string)
+ {
+ i = atoi (ix86_preferred_stack_boundary_string);
+ if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+ error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
+ TARGET_64BIT ? 4 : 2);
+ else
+ ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ }
+
+ /* Accept -msseregparm only if at least SSE support is enabled. */
+ if (TARGET_SSEREGPARM
+ && ! TARGET_SSE)
+ error ("-msseregparm used without SSE enabled");
+
+ ix86_fpmath = TARGET_FPMATH_DEFAULT;
+
+ if (ix86_fpmath_string != 0)
+ {
+ if (! strcmp (ix86_fpmath_string, "387"))
+ ix86_fpmath = FPMATH_387;
+ else if (! strcmp (ix86_fpmath_string, "sse"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else if (! strcmp (ix86_fpmath_string, "387,sse")
+ || ! strcmp (ix86_fpmath_string, "sse,387"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else if (!TARGET_80387)
+ {
+ warning (0, "387 instruction set disabled, using SSE arithmetics");
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE | FPMATH_387;
+ }
+ else
+ error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
+ }
+
+ /* If the i387 is disabled, then do not return values in it. */
+ if (!TARGET_80387)
+ target_flags &= ~MASK_FLOAT_RETURNS;
+
+ if ((x86_accumulate_outgoing_args & TUNEMASK)
+ && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ && !optimize_size)
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+
+ /* ??? Unwind info is not correct around the CFG unless either a frame
+ pointer is present or M_A_O_A is set. Fixing this requires rewriting
+ unwind info generation to be aware of the CFG and propagating states
+ around edges. */
+ if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+ || flag_exceptions || flag_non_call_exceptions)
+ && flag_omit_frame_pointer
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "unwind tables currently require either a frame pointer "
+ "or -maccumulate-outgoing-args for correctness");
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
+ /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
+ {
+ char *p;
+ ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
+ p = strchr (internal_label_prefix, 'X');
+ internal_label_prefix_len = p - internal_label_prefix;
+ *p = '\0';
+ }
+
+ /* When scheduling description is not available, disable scheduler pass
+ so it won't slow down the compilation and make x87 code slower. */
+ /* APPLE LOCAL 5591571 */
+ if (1 || !TARGET_SCHEDULE)
+ flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (MACHO_DYNAMIC_NO_PIC_P)
+ {
+ if (flag_pic)
+ warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC");
+ flag_pic = 0;
+ }
+ else
+#endif
+ if (flag_pic == 1)
+ {
+ /* Darwin's -fpic is -fPIC. */
+ flag_pic = 2;
+ }
+ /* APPLE LOCAL end dynamic-no-pic */
+ /* APPLE LOCAL begin 4812082 -fast */
+ /* These flags were the best on the software H264 codec, and have therefore
+ been lumped into -fast per 4812082. They have not been evaluated on
+ any other code, except that -fno-tree-pre is known to lose on the
+ hardware accelerated version of the codec. */
+ if (flag_fast || flag_fastf || flag_fastcp)
+ {
+ flag_omit_frame_pointer = 1;
+ flag_strict_aliasing = 1;
+ flag_tree_pre = 0;
+ target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ }
+ /* APPLE LOCAL end 4812082 -fast */
+}
+
+/* switch to the appropriate section for output of DECL.
+ DECL is either a `VAR_DECL' node or a constant of some sort.
+ RELOC indicates whether forming the initial value of DECL requires
+ link-time relocations. */
+
+static section *
+x86_64_elf_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *sname = NULL;
+ unsigned int flags = SECTION_WRITE;
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ sname = ".ldata";
+ break;
+ case SECCAT_DATA_REL:
+ sname = ".ldata.rel";
+ break;
+ case SECCAT_DATA_REL_LOCAL:
+ sname = ".ldata.rel.local";
+ break;
+ case SECCAT_DATA_REL_RO:
+ sname = ".ldata.rel.ro";
+ break;
+ case SECCAT_DATA_REL_RO_LOCAL:
+ sname = ".ldata.rel.ro.local";
+ break;
+ case SECCAT_BSS:
+ sname = ".lbss";
+ flags |= SECTION_BSS;
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ sname = ".lrodata";
+ flags = 0;
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ }
+ if (sname)
+ {
+ /* We might get called with string constants, but get_named_section
+ doesn't like them as they are not DECLs. Also, we need to set
+ flags in that case. */
+ if (!DECL_P (decl))
+ return get_section (sname, flags, NULL);
+ return get_named_section (decl, sname, reloc);
+ }
+ }
+ return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+ STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+ RELOC indicates whether the initial value of EXP requires
+ link-time relocations. */
+
+static void
+x86_64_elf_unique_section (tree decl, int reloc)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *prefix = NULL;
+ /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
+ bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ case SECCAT_DATA_REL:
+ case SECCAT_DATA_REL_LOCAL:
+ case SECCAT_DATA_REL_RO:
+ case SECCAT_DATA_REL_RO_LOCAL:
+ prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
+ break;
+ case SECCAT_BSS:
+ prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ }
+ if (prefix)
+ {
+ const char *name;
+ size_t nlen, plen;
+ char *string;
+ plen = strlen (prefix);
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ name = targetm.strip_name_encoding (name);
+ nlen = strlen (name);
+
+ string = alloca (nlen + plen + 1);
+ memcpy (string, prefix, plen);
+ memcpy (string + plen, name, nlen + 1);
+
+ DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
+ return;
+ }
+ }
+ default_unique_section (decl, reloc);
+}
+
+#ifdef COMMON_ASM_OP
+/* This says how to output assembler code to declare an
+ uninitialized external linkage data object.
+
+ For medium model x86-64 we need to use .largecomm opcode for
+ large objects. */
+void
+x86_elf_aligned_common (FILE *file,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ fprintf (file, ".largecomm\t");
+ else
+ fprintf (file, "%s", COMMON_ASM_OP);
+ assemble_name (file, name);
+ fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+ size, align / BITS_PER_UNIT);
+}
+
+/* Utility function for targets to use in implementing
+ ASM_OUTPUT_ALIGNED_BSS. */
+
+void
+x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ switch_to_section (get_named_section (decl, ".lbss", 0));
+ else
+ switch_to_section (bss_section);
+ ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+ last_assemble_variable_decl = decl;
+ ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+ /* Standard thing is just output label for the object. */
+ ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+ ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+#endif
+
+void
+optimization_options (int level, int size ATTRIBUTE_UNUSED)
+{
+ /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */
+#if TARGET_MACHO
+ flag_strict_aliasing = 0;
+#endif
+ /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */
+ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
+ make the problem with not enough registers even worse. */
+#ifdef INSN_SCHEDULING
+ if (level > 1)
+ flag_schedule_insns = 0;
+#endif
+
+ /* APPLE LOCAL begin pragma fenv */
+ /* Trapping math is not needed by many users, and is expensive.
+ C99 permits us to default it off and we do that. It is
+ turned on when <fenv.h> is included (see darwin_pragma_fenv
+ in darwin-c.c). */
+ flag_trapping_math = 0;
+ /* APPLE LOCAL end pragma fenv */
+
+ if (TARGET_MACHO)
+ /* The Darwin libraries never set errno, so we might as well
+ avoid calling them when that's the only reason we would. */
+ flag_errno_math = 0;
+
+ /* The default values of these switches depend on the TARGET_64BIT
+ that is not known at this moment. Mark these values with 2 and
+ let user the to override these. In case there is no command line option
+ specifying them, we will set the defaults in override_options. */
+ if (optimize >= 1)
+ flag_omit_frame_pointer = 2;
+ flag_pcc_struct_return = 2;
+ flag_asynchronous_unwind_tables = 2;
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+ SUBTARGET_OPTIMIZATION_OPTIONS;
+#endif
+ /* APPLE LOCAL begin 4200243 */
+ if (getenv ("RC_FORCE_SSE3"))
+ target_flags |= MASK_SSE3;
+}
+/* APPLE LOCAL end 4200243 */
+
+/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */
+/* Version of the above for use from #pragma optimization_level. Only
+ per-function flags are reset. */
+#if TARGET_MACHO
+void
+reset_optimization_options (int level, int size)
+{
+ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
+ make the problem with not enough registers even worse. */
+#ifdef INSN_SCHEDULING
+ if (level > 1)
+ flag_schedule_insns = 0;
+#endif
+
+ /* APPLE LOCAL begin pragma fenv */
+ /* Trapping math is not needed by many users, and is expensive.
+ C99 permits us to default it off and we do that. It is
+ turned on when <fenv.h> is included (see darwin_pragma_fenv
+ in darwin-c.c). */
+ flag_trapping_math = 0;
+ /* APPLE LOCAL end pragma fenv */
+
+ /* The default values of these switches depend on TARGET_64BIT
+ which was set earlier and not reset. */
+ if (optimize >= 1)
+ {
+ if (TARGET_64BIT)
+ flag_omit_frame_pointer = 1;
+ else
+ flag_omit_frame_pointer = 0;
+ }
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+ SUBTARGET_OPTIMIZATION_OPTIONS;
+#endif
+ /* APPLE LOCAL begin 4760857 */
+ if (size)
+ ix86_cost = &size_cost;
+ else
+ ix86_cost = processor_target_table[ix86_tune].cost;
+
+ /* Default align_* from the processor table. */
+ if (align_loops == 0)
+ {
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ }
+ if (align_jumps == 0)
+ {
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ }
+ /* APPLE LOCAL end 4760857 */
+}
+#endif
+/* APPLE LOCAL end optimization pragmas 3124235/3420242 */
+
+/* Table of valid machine attributes. */
+const struct attribute_spec ix86_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+ /* Stdcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Fastcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Cdecl attribute says the callee is a normal C declaration */
+ { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Regparm attribute specifies how many integer arguments are to be
+ passed in registers. */
+ { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
+ /* APPLE LOCAL begin regparmandstackparm */
+ /* regparmandstackparm means two entry points; a traditional stack-based
+ one, and another, with a mangled name, that employs regparm and
+ sseregparm. */
+ { "regparmandstackparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ { "regparmandstackparmee", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* APPLE LOCAL end regparmandstackparm */
+ /* Sseregparm attribute says we are using x86_64 calling conventions
+ for FP arguments. */
+ { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* force_align_arg_pointer says this function realigns the stack at entry. */
+ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+ false, true, true, ix86_handle_cconv_attribute },
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+ { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
+#endif
+ { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+ { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+ SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+ { NULL, 0, 0, false, false, false, NULL }
+};
+
+/* Decide whether we can make a sibling call to a function. DECL is the
+ declaration of the function being targeted by the call and EXP is the
+ CALL_EXPR representing the call. */
+
+static bool
+ix86_function_ok_for_sibcall (tree decl, tree exp)
+{
+ tree func;
+ rtx a, b;
+
+ /* APPLE LOCAL begin indirect sibcall 4087330 */
+ /* If we are generating position-independent code, we cannot sibcall
+ optimize any indirect call, or a direct call to a global function,
+ as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
+ if (!TARGET_MACHO
+ && !TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
+ return false;
+ /* APPLE LOCAL end indirect sibcall 4087330 */
+
+ if (decl)
+ func = decl;
+ else
+ {
+ func = TREE_TYPE (TREE_OPERAND (exp, 0));
+ if (POINTER_TYPE_P (func))
+ func = TREE_TYPE (func);
+ }
+
+ /* Check that the return value locations are the same. Like
+ if we are returning floats on the 80387 register stack, we cannot
+ make a sibcall from a function that doesn't return a float to a
+ function that does or, conversely, from a function that does return
+ a float to a function that doesn't; the necessary stack adjustment
+ would not be executed. This is also the place we notice
+ differences in the return value ABI. Note that it is ok for one
+ of the functions to have void return type as long as the return
+ value of the other is passed in a register. */
+ a = ix86_function_value (TREE_TYPE (exp), func, false);
+ b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (STACK_REG_P (a) || STACK_REG_P (b))
+ {
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
+ else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ ;
+ else if (!rtx_equal_p (a, b))
+ return false;
+
+ /* If this call is indirect, we'll need to be able to use a call-clobbered
+ register for the address of the target function. Make sure that all
+ such registers are not used for passing parameters. */
+ if (!decl && !TARGET_64BIT)
+ {
+ tree type;
+
+ /* We're looking at the CALL_EXPR, we need the type of the function. */
+ type = TREE_OPERAND (exp, 0); /* pointer expression */
+ type = TREE_TYPE (type); /* pointer type */
+ type = TREE_TYPE (type); /* function type */
+
+ if (ix86_function_regparm (type, NULL) >= 3)
+ {
+ /* ??? Need to count the actual number of registers to be used,
+ not the possible number of registers. Fix later. */
+ return false;
+ }
+ }
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ /* Dllimport'd functions are also called indirectly. */
+ if (decl && DECL_DLLIMPORT_P (decl)
+ && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
+ return false;
+#endif
+
+ /* If we forced aligned the stack, then sibcalling would unalign the
+ stack, which may break the called function. */
+ if (cfun->machine->force_align_arg_pointer)
+ return false;
+
+ /* Otherwise okay. That also includes certain types of indirect calls. */
+ return true;
+}
+
+/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
+ calling convention attributes;
+ arguments as in struct attribute_spec.handler. */
+
+static tree
+ix86_handle_cconv_attribute (tree *node, tree name,
+ tree args,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qs attribute only applies to functions",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine regparm with all attributes but fastcall. */
+ if (is_attribute_p ("regparm", name))
+ {
+ tree cst;
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (!TARGET_64BIT
+ && (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node))
+ || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (*node))))
+ {
+ error ("regparmandstackparm and regparm attributes are not compatible");
+ }
+ /* APPLE LOCAL end regparmandstackparm */
+
+ cst = TREE_VALUE (args);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ {
+ warning (OPT_Wattributes,
+ "%qs attribute requires an integer constant argument",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+ else if (compare_tree_int (cst, REGPARM_MAX) > 0)
+ {
+ warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
+ IDENTIFIER_POINTER (name), REGPARM_MAX);
+ *no_add_attrs = true;
+ }
+
+ if (!TARGET_64BIT
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (*node))
+ && compare_tree_int (cst, REGPARM_MAX-1))
+ {
+ error ("%s functions limited to %d register parameters",
+ ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
+ }
+
+ return NULL_TREE;
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Turn on popcnt instruction for -msse4.2 or -mabm. */
+ if (TARGET_SSE4_2)
+ x86_popcnt = true;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ if (TARGET_64BIT)
+ {
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine fastcall with stdcall (redundant) and sseregparm. */
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (is_attribute_p ("fastcall", name)
+ || is_attribute_p ("regparmandstackparm", name))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and stdcall attributes are not compatible");
+ }
+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+ }
+
+ /* Can combine stdcall with fastcall (redundant), regparm and
+ sseregparm. */
+ else if (is_attribute_p ("stdcall", name))
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))
+ || lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node)))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ error ("stdcall and fastcall attributes are not compatible");
+ }
+ }
+
+ /* Can combine cdecl with regparm and sseregparm. */
+ else if (is_attribute_p ("cdecl", name))
+ {
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ }
+
+ /* Can combine sseregparm with all attributes. */
+
+ return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+ are compatible, and 2 if they are nearly compatible (which causes a
+ warning to be generated). */
+
+static int
+ix86_comp_type_attributes (tree type1, tree type2)
+{
+ /* Check for mismatch of non-default calling convention. */
+ const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
+
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
+ return 1;
+
+ /* Check for mismatched fastcall/regparm types. */
+ if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
+ || (ix86_function_regparm (type1, NULL)
+ != ix86_function_regparm (type2, NULL)))
+ return 0;
+
+ /* Check for mismatched sseregparm types. */
+ if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ /* Check for mismatched return types (cdecl vs stdcall). */
+ if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ return 1;
+}
+
+/* Return the regparm value for a function with the indicated TYPE and DECL.
+ DECL may be NULL when calling function indirectly
+ or considering a libcall. */
+
+static int
+ix86_function_regparm (tree type, tree decl)
+{
+ tree attr;
+ /* APPLE LOCAL begin MERGE FIXME audit to ensure that it's ok
+
+ We had local_regparm but the FSF didn't and there didn't seem to
+ be a merge conflict some something is strange. These seem to be just
+ normal apple local changes. I asked Stuart about them in email. */
+ int regparm = ix86_regparm;
+ bool user_convention = false;
+
+ if (!TARGET_64BIT)
+ {
+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+ user_convention = true;
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))
+ || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ regparm = 2;
+ user_convention = true;
+ }
+
+ /* Use register calling convention for local functions when possible. */
+ if (!TARGET_64BIT && !user_convention && decl
+ && flag_unit_at_a_time && !profile_flag)
+ {
+ struct cgraph_local_info *i = cgraph_local_info (decl);
+ if (i && i->local)
+ {
+ int local_regparm, globals = 0, regno;
+
+ /* Make sure no regparm register is taken by a global register
+ variable. */
+ for (local_regparm = 0; local_regparm < 3; local_regparm++)
+ if (global_regs[local_regparm])
+ break;
+ /* We can't use regparm(3) for nested functions as these use
+ static chain pointer in third argument. */
+ if (local_regparm == 3
+ /* APPLE LOCAL begin mainline */
+ && (decl_function_context (decl)
+ || ix86_force_align_arg_pointer)
+ /* APPLE LOCAL end mainline */
+ && !DECL_NO_STATIC_CHAIN (decl))
+ local_regparm = 2;
+ /* If the function realigns its stackpointer, the
+ prologue will clobber %ecx. If we've already
+ generated code for the callee, the callee
+ DECL_STRUCT_FUNCTION is gone, so we fall back to
+ scanning the attributes for the self-realigning
+ property. */
+ if ((DECL_STRUCT_FUNCTION (decl)
+ /* MERGE FIXME was in our version, but not in FSF 2006-05-23 */
+ && DECL_STRUCT_FUNCTION (decl)->machine
+ && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
+ || (!DECL_STRUCT_FUNCTION (decl)
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
+ local_regparm = 2;
+ /* Each global register variable increases register preassure,
+ so the more global reg vars there are, the smaller regparm
+ optimization use, unless requested by the user explicitly. */
+ for (regno = 0; regno < 6; regno++)
+ if (global_regs[regno])
+ globals++;
+ local_regparm
+ = globals < local_regparm ? local_regparm - globals : 0;
+
+ if (local_regparm > regparm)
+ regparm = local_regparm;
+ }
+ }
+ }
+ /* APPLE LOCAL end MERGE FIXME audit to ensure that it's ok */
+ return regparm;
+}
+
+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
+ DFmode (2) arguments in SSE registers for a function with the
+ indicated TYPE and DECL. DECL may be NULL when calling function
+ indirectly or considering a libcall. Otherwise return 0. */
+
+static int
+ix86_function_sseregparm (tree type, tree decl)
+{
+ /* Use SSE registers to pass SFmode and DFmode arguments if requested
+ by the sseregparm attribute. */
+ if (TARGET_SSEREGPARM
+ || (type
+ && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
+ {
+ if (!TARGET_SSE)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ return 0;
+ }
+
+ return 2;
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (type && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
+ return 2;
+ /* APPLE LOCAL end regparmandstackparm */
+
+ /* For local functions, pass up to SSE_REGPARM_MAX SFmode
+ (and DFmode for SSE2) arguments in SSE registers,
+ even for 32-bit targets. */
+ if (!TARGET_64BIT && decl
+ && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
+ {
+ struct cgraph_local_info *i = cgraph_local_info (decl);
+ if (i && i->local)
+ return TARGET_SSE2 ? 2 : 1;
+ }
+
+ return 0;
+}
+
+/* Return true if EAX is live at the start of the function. Used by
+ ix86_expand_prologue to determine if we need special help before
+ calling allocate_stack_worker. */
+
+static bool
+ix86_eax_live_at_start_p (void)
+{
+ /* Cheat. Don't bother working forward from ix86_function_regparm
+ to the function type to whether an actual argument is located in
+ eax. Instead just look at cfg info, which is still close enough
+ to correct at this point. This gives false positives for broken
+ functions that might use uninitialized data that happens to be
+ allocated in eax, but who cares? */
+ return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
+}
+
+/* Value is the number of bytes of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the 80386, the RTD insn may be used to pop them if the number
+ of args is fixed, but if the number is variable then the caller
+ must pop them all. RTD can't be used for library calls now
+ because the library is compiled with the Unix compiler.
+ Use of RTD is a selectable option, since it is incompatible with
+ standard Unix calling sequences. If the option is not selected,
+ the caller must always pop the args.
+
+ The attribute stdcall is equivalent to RTD on a per module basis. */
+
+int
+ix86_return_pops_args (tree fundecl, tree funtype, int size)
+{
+ int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
+
+ /* Cdecl functions override -mrtd, and never pop the stack. */
+ if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
+
+ /* Stdcall and fastcall functions will pop the stack if not
+ variable args. */
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
+ rtd = 1;
+
+ if (rtd
+ && (TYPE_ARG_TYPES (funtype) == NULL_TREE
+ || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
+ == void_type_node)))
+ return size;
+ }
+
+ /* Lose any fake structure return argument if it is passed on the stack. */
+ if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+ && !TARGET_64BIT
+ && !KEEP_AGGREGATE_RETURN_POINTER)
+ {
+ int nregs = ix86_function_regparm (funtype, fundecl);
+
+ if (!nregs)
+ return GET_MODE_SIZE (Pmode);
+ }
+
+ return 0;
+}
+
+/* Argument support functions. */
+
+/* Return true when register may be used to pass function parameters. */
+bool
+ix86_function_arg_regno_p (int regno)
+{
+ int i;
+ if (!TARGET_64BIT)
+ {
+ if (TARGET_MACHO)
+ return (regno < REGPARM_MAX
+ || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
+ else
+ return (regno < REGPARM_MAX
+ || (TARGET_MMX && MMX_REGNO_P (regno)
+ && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
+ || (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
+ }
+
+ if (TARGET_MACHO)
+ {
+ if (SSE_REGNO_P (regno) && TARGET_SSE)
+ return true;
+ }
+ else
+ {
+ if (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
+ return true;
+ }
+ /* RAX is used as hidden argument to va_arg functions. */
+ if (!regno)
+ return true;
+ for (i = 0; i < REGPARM_MAX; i++)
+ if (regno == x86_64_int_parameter_registers[i])
+ return true;
+ return false;
+}
+
+/* Return if we do not know how to pass TYPE solely in registers. */
+
+static bool
+ix86_must_pass_in_stack (enum machine_mode mode, tree type)
+{
+ if (must_pass_in_stack_var_size_or_pad (mode, type))
+ return true;
+
+ /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
+ The layout_type routine is crafty and tries to trick us into passing
+ currently unsupported vector types on the stack by using TImode. */
+ return (!TARGET_64BIT && mode == TImode
+ && type && TREE_CODE (type) != VECTOR_TYPE);
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
+ tree fntype, /* tree ptr for function decl */
+ rtx libname, /* SYMBOL_REF of library name or 0 */
+ tree fndecl)
+{
+ static CUMULATIVE_ARGS zero_cum;
+ tree param, next_param;
+
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr, "\ninit_cumulative_args (");
+ if (fntype)
+ fprintf (stderr, "fntype code = %s, ret code = %s",
+ tree_code_name[(int) TREE_CODE (fntype)],
+ tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
+ else
+ fprintf (stderr, "no fntype");
+
+ if (libname)
+ fprintf (stderr, ", libname = %s", XSTR (libname, 0));
+ }
+
+ *cum = zero_cum;
+
+ /* Set up the number of registers to use for passing arguments. */
+ cum->nregs = ix86_regparm;
+ if (TARGET_SSE)
+ cum->sse_nregs = SSE_REGPARM_MAX;
+ if (TARGET_MMX)
+ cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->warn_sse = true;
+ cum->warn_mmx = true;
+ cum->maybe_vaarg = false;
+
+ /* Use ecx and edx registers if function has fastcall attribute,
+ else look for regparm information. */
+ if (fntype && !TARGET_64BIT)
+ {
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 2;
+ cum->fastcall = 1;
+ }
+ else
+ cum->nregs = ix86_function_regparm (fntype, fndecl);
+ }
+
+ /* Set up the number of SSE registers used for passing SFmode
+ and DFmode arguments. Warn for mismatching ABI. */
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
+
+ /* Determine if this function has variable arguments. This is
+ indicated by the last argument being 'void_type_mode' if there
+ are no variable arguments. If there are variable arguments, then
+ we won't pass anything in registers in 32-bit mode. */
+
+ if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
+ {
+ for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
+ param != 0; param = next_param)
+ {
+ next_param = TREE_CHAIN (param);
+ if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+ {
+ if (!TARGET_64BIT)
+ {
+ cum->nregs = 0;
+ cum->sse_nregs = 0;
+ cum->mmx_nregs = 0;
+ cum->warn_sse = 0;
+ cum->warn_mmx = 0;
+ cum->fastcall = 0;
+ cum->float_in_sse = 0;
+ }
+ cum->maybe_vaarg = true;
+ }
+ }
+ }
+ if ((!fntype && !libname)
+ || (fntype && !TYPE_ARG_TYPES (fntype)))
+ cum->maybe_vaarg = true;
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, ", nregs=%d )\n", cum->nregs);
+
+ return;
+}
+
+/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
+ But in the case of vector types, it is some vector mode.
+
+ When we have only some of our vector isa extensions enabled, then there
+ are some modes for which vector_mode_supported_p is false. For these
+ modes, the generic vector support in gcc will choose some non-vector mode
+ in order to implement the type. By computing the natural mode, we'll
+ select the proper ABI location for the operand and not depend on whatever
+ the middle-end decides to do with these vector types. */
+
+static enum machine_mode
+type_natural_mode (tree type)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+
+ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if ((size == 8 || size == 16)
+ /* ??? Generic code allows us to create width 1 vectors. Ignore. */
+ && TYPE_VECTOR_SUBPARTS (type) > 1)
+ {
+ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+
+ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+ mode = MIN_MODE_VECTOR_FLOAT;
+ else
+ mode = MIN_MODE_VECTOR_INT;
+
+ /* Get the mode which has this inner mode and number of units. */
+ for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+ && GET_MODE_INNER (mode) == innermode)
+ return mode;
+
+ gcc_unreachable ();
+ }
+ }
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ /* Pass V1DImode objects as DImode. This is for compatibility. */
+ if (TREE_CODE (type) == VECTOR_TYPE && mode == V1DImode && !TARGET_64BIT)
+ return DImode;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ return mode;
+}
+
+/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
+ this may not agree with the mode that the type system has chosen for the
+ register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
+ go ahead and use it. Otherwise we have to build a PARALLEL instead. */
+
+static rtx
+gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+ unsigned int regno)
+{
+ rtx tmp;
+
+ if (orig_mode != BLKmode)
+ tmp = gen_rtx_REG (orig_mode, regno);
+ else
+ {
+ tmp = gen_rtx_REG (mode, regno);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+ }
+
+ return tmp;
+}
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ BIT_OFFSET is used internally for handling records and specifies offset
+ of the offset in bits modulo 256 to avoid overflow cases.
+
+ See the x86-64 PS ABI for details.
+*/
+
+static int
+classify_argument (enum machine_mode mode, tree type,
+ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+{
+ HOST_WIDE_INT bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ /* Variable sized entities are always passed/returned in memory. */
+ if (bytes < 0)
+ return 0;
+
+ if (mode != VOIDmode
+ && targetm.calls.must_pass_in_stack (mode, type))
+ return 0;
+
+ if (type && AGGREGATE_TYPE_P (type))
+ {
+ int i;
+ tree field;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* On x86-64 we pass structures larger than 16 bytes on the stack. */
+ if (bytes > 16)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Classify each field of record and merge classes. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ /* For classes first merge in the field of the subclasses. */
+ if (TYPE_BINFO (type))
+ {
+ tree binfo, base_binfo;
+ int basenum;
+
+ for (binfo = TYPE_BINFO (type), basenum = 0;
+ BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
+ {
+ int num;
+ int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
+ tree type = BINFO_TYPE (base_binfo);
+
+ num = classify_argument (TYPE_MODE (type),
+ type, subclasses,
+ (offset + bit_offset) % 256);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos = (offset + (bit_offset % 64)) / 8 / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+ }
+ }
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ /* Bitfields are always classified as integer. Handle them
+ early, since later code would consider them to be
+ misaligned integers. */
+ if (DECL_BIT_FIELD (field))
+ {
+ for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ i < ((int_bit_position (field) + (bit_offset % 64))
+ + tree_low_cst (DECL_SIZE (field), 0)
+ + 63) / 8 / 8; i++)
+ classes[i] =
+ merge_classes (X86_64_INTEGER_CLASS,
+ classes[i]);
+ }
+ else
+ {
+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+ TREE_TYPE (field), subclasses,
+ (int_bit_position (field)
+ + bit_offset) % 256);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos =
+ (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+ }
+ }
+ }
+ break;
+
+ case ARRAY_TYPE:
+ /* Arrays are handled as small records. */
+ {
+ int num;
+ num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
+ TREE_TYPE (type), subclasses, bit_offset);
+ if (!num)
+ return 0;
+
+ /* The partial classes are now full classes. */
+ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
+ subclasses[0] = X86_64_SSE_CLASS;
+ if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
+ subclasses[0] = X86_64_INTEGER_CLASS;
+
+ for (i = 0; i < words; i++)
+ classes[i] = subclasses[i % num];
+
+ break;
+ }
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ /* Unions are similar to RECORD_TYPE but offset is always 0.
+ */
+
+ /* Unions are not derived. */
+ gcc_assert (!TYPE_BINFO (type)
+ || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+ TREE_TYPE (field), subclasses,
+ bit_offset);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ classes[i] = merge_classes (subclasses[i], classes[i]);
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
+ classes[i] = X86_64_SSE_CLASS;
+
+ /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
+ classes[i] = X86_64_SSE_CLASS;
+ }
+ return words;
+ }
+
+ /* Compute alignment needed. We align all types to natural boundaries with
+ exception of XFmode that is aligned to 64bits. */
+ if (mode != VOIDmode && mode != BLKmode)
+ {
+ int mode_alignment = GET_MODE_BITSIZE (mode);
+
+ if (mode == XFmode)
+ mode_alignment = 128;
+ else if (mode == XCmode)
+ mode_alignment = 256;
+ if (COMPLEX_MODE_P (mode))
+ mode_alignment /= 2;
+ /* Misaligned fields are always returned in memory. */
+ if (bit_offset % mode_alignment)
+ return 0;
+ }
+
+ /* for V1xx modes, just use the base mode */
+ if (VECTOR_MODE_P (mode)
+ && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
+ mode = GET_MODE_INNER (mode);
+
+ /* Classification of atomic types. */
+ switch (mode)
+ {
+ case SDmode:
+ case DDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case TDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ case CSImode:
+ case CHImode:
+ case CQImode:
+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ else
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ case CDImode:
+ case TImode:
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ case CTImode:
+ return 0;
+ case SFmode:
+ if (!(bit_offset % 64))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case DFmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+ case XFmode:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case TFmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case SCmode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case DCmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+ case XCmode:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+ case TCmode:
+ /* This modes is larger than 16 bytes. */
+ return 0;
+ case V4SFmode:
+ case V4SImode:
+ case V16QImode:
+ case V8HImode:
+ case V2DFmode:
+ case V2DImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case V2SFmode:
+ case V2SImode:
+ case V4HImode:
+ case V8QImode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case BLKmode:
+ case VOIDmode:
+ return 0;
+ default:
+ gcc_assert (VECTOR_MODE_P (mode));
+
+ if (bytes > 16)
+ return 0;
+
+ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
+
+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ else
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGER_CLASS;
+ return 1 + (bytes > 8);
+ }
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return 0 iff parameter should be passed in memory. */
+static int
+examine_argument (enum machine_mode mode, tree type, int in_return,
+ int *int_nregs, int *sse_nregs)
+{
+ enum x86_64_reg_class class[MAX_CLASSES];
+ int n = classify_argument (mode, type, class, 0);
+
+ *int_nregs = 0;
+ *sse_nregs = 0;
+ if (!n)
+ return 0;
+ for (n--; n >= 0; n--)
+ switch (class[n])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ (*int_nregs)++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ (*sse_nregs)++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ if (!in_return)
+ return 0;
+ break;
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return ? 2 : 0;
+ case X86_64_MEMORY_CLASS:
+ gcc_unreachable ();
+ }
+ return 1;
+}
+
+/* Construct container for the argument used by GCC interface. See
+ FUNCTION_ARG for the detailed description. */
+
+static rtx
+construct_container (enum machine_mode mode, enum machine_mode orig_mode,
+ tree type, int in_return, int nintregs, int nsseregs,
+ const int *intreg, int sse_regno)
+{
+ /* The following variables hold the static issued_error state. */
+ static bool issued_sse_arg_error;
+ static bool issued_sse_ret_error;
+ static bool issued_x87_ret_error;
+
+ enum machine_mode tmpmode;
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ enum x86_64_reg_class class[MAX_CLASSES];
+ int n;
+ int i;
+ int nexps = 0;
+ int needed_sseregs, needed_intregs;
+ rtx exp[MAX_CLASSES];
+ rtx ret;
+
+ n = classify_argument (mode, type, class, 0);
+ if (TARGET_DEBUG_ARG)
+ {
+ if (!n)
+ fprintf (stderr, "Memory class\n");
+ else
+ {
+ fprintf (stderr, "Classes:");
+ for (i = 0; i < n; i++)
+ {
+ fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
+ }
+ fprintf (stderr, "\n");
+ }
+ }
+ if (!n)
+ return NULL;
+ if (!examine_argument (mode, type, in_return, &needed_intregs,
+ &needed_sseregs))
+ return NULL;
+ if (needed_intregs > nintregs || needed_sseregs > nsseregs)
+ return NULL;
+
+ /* We allowed the user to turn off SSE for kernel mode. Don't crash if
+ some less clueful developer tries to use floating-point anyway. */
+ if (needed_sseregs && !TARGET_SSE)
+ {
+ if (in_return)
+ {
+ if (!issued_sse_ret_error)
+ {
+ error ("SSE register return with SSE disabled");
+ issued_sse_ret_error = true;
+ }
+ }
+ else if (!issued_sse_arg_error)
+ {
+ error ("SSE register argument with SSE disabled");
+ issued_sse_arg_error = true;
+ }
+ return NULL;
+ }
+
+ /* Likewise, error if the ABI requires us to return values in the
+ x87 registers and the user specified -mno-80387. */
+ if (!TARGET_80387 && in_return)
+ for (i = 0; i < n; i++)
+ if (class[i] == X86_64_X87_CLASS
+ || class[i] == X86_64_X87UP_CLASS
+ || class[i] == X86_64_COMPLEX_X87_CLASS)
+ {
+ if (!issued_x87_ret_error)
+ {
+ error ("x87 register return with x87 disabled");
+ issued_x87_ret_error = true;
+ }
+ return NULL;
+ }
+
+ /* First construct simple cases. Avoid SCmode, since we want to use
+ single register to pass this type. */
+ if (n == 1 && mode != SCmode)
+ switch (class[0])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ return gen_rtx_REG (mode, intreg[0]);
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
+ case X86_64_X87_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return gen_rtx_REG (mode, FIRST_STACK_REG);
+ case X86_64_NO_CLASS:
+ /* Zero sized array, struct or class. */
+ return NULL;
+ default:
+ gcc_unreachable ();
+ }
+ if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+ if (n == 2
+ && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
+ return gen_rtx_REG (XFmode, FIRST_STACK_REG);
+ if (n == 2 && class[0] == X86_64_INTEGER_CLASS
+ && class[1] == X86_64_INTEGER_CLASS
+ && (mode == CDImode || mode == TImode || mode == TFmode)
+ && intreg[0] + 1 == intreg[1])
+ return gen_rtx_REG (mode, intreg[0]);
+
+ /* Otherwise figure out the entries of the PARALLEL. */
+ for (i = 0; i < n; i++)
+ {
+ switch (class[i])
+ {
+ case X86_64_NO_CLASS:
+ break;
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ /* Merge TImodes on aligned occasions here too. */
+ if (i * 8 + 8 > bytes)
+ tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+ else if (class[i] == X86_64_INTEGERSI_CLASS)
+ tmpmode = SImode;
+ else
+ tmpmode = DImode;
+ /* We've requested 24 bytes we don't have mode for. Use DImode. */
+ if (tmpmode == BLKmode)
+ tmpmode = DImode;
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode, *intreg),
+ GEN_INT (i*8));
+ intreg++;
+ break;
+ case X86_64_SSESF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (SFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSEDF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (DFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSE_CLASS:
+ if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
+ tmpmode = TImode;
+ else
+ tmpmode = DImode;
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ if (tmpmode == TImode)
+ i++;
+ sse_regno++;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Empty aligned struct, union or class. */
+ if (nexps == 0)
+ return NULL;
+
+ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
+ for (i = 0; i < nexps; i++)
+ XVECEXP (ret, 0, i) = exp [i];
+ return ret;
+}
+
+/* Update the data in CUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
+
+void
+function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int named)
+{
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (type)
+ mode = type_natural_mode (type);
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
+ "mode=%s, named=%d)\n\n",
+ words, cum->words, cum->nregs, cum->sse_nregs,
+ GET_MODE_NAME (mode), named);
+
+ if (TARGET_64BIT)
+ {
+ int int_nregs, sse_nregs;
+ if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
+ cum->words += words;
+ else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+ {
+ cum->nregs -= int_nregs;
+ cum->sse_nregs -= sse_nregs;
+ cum->regno += int_nregs;
+ cum->sse_regno += sse_nregs;
+ }
+ else
+ cum->words += words;
+ }
+ else
+ {
+ switch (mode)
+ {
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ cum->words += words;
+ cum->nregs -= words;
+ cum->regno += words;
+
+ if (cum->nregs <= 0)
+ {
+ cum->nregs = 0;
+ cum->regno = 0;
+ }
+ break;
+
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+
+ case TImode:
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->sse_words += words;
+ cum->sse_nregs -= 1;
+ cum->sse_regno += 1;
+ if (cum->sse_nregs <= 0)
+ {
+ cum->sse_nregs = 0;
+ cum->sse_regno = 0;
+ }
+ }
+ break;
+
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->mmx_words += words;
+ cum->mmx_nregs -= 1;
+ cum->mmx_regno += 1;
+ if (cum->mmx_nregs <= 0)
+ {
+ cum->mmx_nregs = 0;
+ cum->mmx_regno = 0;
+ }
+ }
+ break;
+ }
+ }
+}
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+rtx
+function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
+ tree type, int named)
+{
+ enum machine_mode mode = orig_mode;
+ rtx ret = NULL_RTX;
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ static bool warnedsse, warnedmmx;
+
+ /* To simplify the code below, represent vector types with a vector mode
+ even if MMX/SSE are not active. */
+ if (type && TREE_CODE (type) == VECTOR_TYPE)
+ mode = type_natural_mode (type);
+
+ /* Handle a hidden AL argument containing number of registers for varargs
+ x86-64 functions. For i386 ABI just return constm1_rtx to avoid
+ any AL settings. */
+ if (mode == VOIDmode)
+ {
+ if (TARGET_64BIT)
+ return GEN_INT (cum->maybe_vaarg
+ ? (cum->sse_nregs < 0
+ ? SSE_REGPARM_MAX
+ : cum->sse_regno)
+ : -1);
+ else
+ return constm1_rtx;
+ }
+ if (TARGET_64BIT)
+ ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
+ cum->sse_nregs,
+ &x86_64_int_parameter_registers [cum->regno],
+ cum->sse_regno);
+ else
+ switch (mode)
+ {
+ /* For now, pass fp/complex values on the stack. */
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ if (words <= cum->nregs)
+ {
+ int regno = cum->regno;
+
+ /* Fastcall allocates the first two DWORD (SImode) or
+ smaller arguments to ECX and EDX. */
+ if (cum->fastcall)
+ {
+ if (mode == BLKmode || mode == DImode)
+ break;
+
+ /* ECX not EAX is the first allocated register. */
+ if (regno == 0)
+ regno = 2;
+ }
+ ret = gen_rtx_REG (mode, regno);
+ }
+ break;
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+ case TImode:
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_SSE && !warnedsse && cum->warn_sse)
+ {
+ warnedsse = true;
+ warning (0, "SSE vector argument without SSE enabled "
+ "changes the ABI");
+ }
+ if (cum->sse_nregs)
+ ret = gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector argument without MMX enabled "
+ "changes the ABI");
+ }
+ if (cum->mmx_nregs)
+ ret = gen_reg_or_parallel (mode, orig_mode,
+ cum->mmx_regno + FIRST_MMX_REG);
+ }
+ break;
+ }
+
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr,
+ "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
+ words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
+
+ if (ret)
+ print_simple_rtl (stderr, ret);
+ else
+ fprintf (stderr, ", stack");
+
+ fprintf (stderr, " )\n");
+ }
+
+ return ret;
+}
+
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+static bool
+ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ tree type, bool named ATTRIBUTE_UNUSED)
+{
+ if (!TARGET_64BIT)
+ return 0;
+
+ if (type && int_size_in_bytes (type) == -1)
+ {
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_arg_pass_by_reference\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument passing
+ ABI. Only called if TARGET_SSE. */
+static bool
+contains_128bit_aligned_vector_p (tree type)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+ if (SSE_REG_MODE_P (mode)
+ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+ return true;
+ if (TYPE_ALIGN (type) < 128)
+ return false;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ /* Walk the aggregates recursively. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ tree field;
+
+ if (TYPE_BINFO (type))
+ {
+ tree binfo, base_binfo;
+ int i;
+
+ for (binfo = TYPE_BINFO (type), i = 0;
+ BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
+ if (contains_128bit_aligned_vector_p
+ (BINFO_TYPE (base_binfo)))
+ return true;
+ }
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL
+ && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
+ return true;
+ }
+ break;
+ }
+
+ case ARRAY_TYPE:
+ /* Just for use if some languages passes arrays by value. */
+ if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
+ return true;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return false;
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+ specified mode and type. */
+
+int
+ix86_function_arg_boundary (enum machine_mode mode, tree type)
+{
+ int align;
+ if (type)
+ align = TYPE_ALIGN (type);
+ else
+ align = GET_MODE_ALIGNMENT (mode);
+ /* APPLE LOCAL begin unbreak ppc64 abi 5103220 */
+ if (type && integer_zerop (TYPE_SIZE (type)))
+ align = PARM_BOUNDARY;
+ /* APPLE LOCAL end unbreak ppc64 abi 5103220 */
+ if (align < PARM_BOUNDARY)
+ align = PARM_BOUNDARY;
+ if (!TARGET_64BIT)
+ {
+ /* i386 ABI defines all arguments to be 4 byte aligned. We have to
+ make an exception for SSE modes since these require 128bit
+ alignment.
+
+ The handling here differs from field_alignment. ICC aligns MMX
+ arguments to 4 byte boundaries, while structure fields are aligned
+ to 8 byte boundaries. */
+ if (!TARGET_SSE)
+ align = PARM_BOUNDARY;
+ else if (!type)
+ {
+ if (!SSE_REG_MODE_P (mode))
+ align = PARM_BOUNDARY;
+ }
+ else
+ {
+ if (!contains_128bit_aligned_vector_p (type))
+ align = PARM_BOUNDARY;
+ }
+ }
+ if (align > 128)
+ align = 128;
+ return align;
+}
+
+/* Return true if N is a possible register number of function value. */
+bool
+ix86_function_value_regno_p (int regno)
+{
+ if (TARGET_MACHO)
+ {
+ if (!TARGET_64BIT)
+ {
+ return ((regno) == 0
+ || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
+ || ((regno) == FIRST_SSE_REG && TARGET_SSE));
+ }
+ return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
+ || ((regno) == FIRST_SSE_REG && TARGET_SSE)
+ || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
+ }
+ else
+ {
+ if (regno == 0
+ || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
+ || (regno == FIRST_SSE_REG && TARGET_SSE))
+ return true;
+
+ if (!TARGET_64BIT
+ && (regno == FIRST_MMX_REG && TARGET_MMX))
+ return true;
+
+ return false;
+ }
+}
+
+/* Define how to find the value returned by a function.
+ VALTYPE is the data type of the value (as a tree).
+ If the precise function being called is known, FUNC is its FUNCTION_DECL;
+ otherwise, FUNC is 0. */
+rtx
+ix86_function_value (tree valtype, tree fntype_or_decl,
+ bool outgoing ATTRIBUTE_UNUSED)
+{
+ enum machine_mode natmode = type_natural_mode (valtype);
+
+ if (TARGET_64BIT)
+ {
+ rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
+ 1, REGPARM_MAX, SSE_REGPARM_MAX,
+ x86_64_int_return_registers, 0);
+ /* For zero sized structures, construct_container return NULL, but we
+ need to keep rest of compiler happy by returning meaningful value. */
+ if (!ret)
+ ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
+ return ret;
+ }
+ else
+ {
+ tree fn = NULL_TREE, fntype;
+ if (fntype_or_decl
+ && DECL_P (fntype_or_decl))
+ fn = fntype_or_decl;
+ fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
+ return gen_rtx_REG (TYPE_MODE (valtype),
+ ix86_value_regno (natmode, fn, fntype));
+ }
+}
+
+/* APPLE LOCAL begin radar 4781080 */
+/* Return true iff must generate call to objcMsgSend for an
+ fp-returning method. */
+bool
+ix86_objc_fpreturn_msgcall (tree ret_type, bool no_long_double)
+{
+ if (no_long_double)
+ return TARGET_64BIT && SCALAR_FLOAT_TYPE_P (ret_type)
+ && TYPE_MODE (ret_type) != XFmode;
+ else
+ return SCALAR_FLOAT_TYPE_P (ret_type);
+}
+/* APPLE LOCAL end radar 4781080 */
+
+/* Return true iff type is returned in memory. */
+int
+ix86_return_in_memory (tree type)
+{
+ int needed_intregs, needed_sseregs, size;
+ enum machine_mode mode = type_natural_mode (type);
+
+ if (TARGET_64BIT)
+ return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+
+ if (mode == BLKmode)
+ return 1;
+
+ size = int_size_in_bytes (type);
+
+ if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
+ return 0;
+
+ if (VECTOR_MODE_P (mode) || mode == TImode)
+ {
+ /* User-created vectors small enough to fit in EAX. */
+ if (size < 8)
+ return 0;
+
+ /* MMX/3dNow values are returned in MM0,
+ except when it doesn't exits. */
+ if (size == 8)
+ /* APPLE LOCAL begin radar 4875125. */
+ /* Undo the mainline patch which broke MACHO ABI compatibility. */
+ return (TARGET_MACHO) ? 1 : (TARGET_MMX ? 0 : 1);
+ /* APPLE LOCAL end radar 4875125. */
+
+ /* SSE values are returned in XMM0, except when it doesn't exist. */
+ if (size == 16)
+ return (TARGET_SSE ? 0 : 1);
+ }
+
+ if (mode == XFmode)
+ return 0;
+
+ if (mode == TDmode)
+ return 1;
+
+ if (size > 12)
+ return 1;
+ return 0;
+}
+
+/* When returning SSE vector types, we have a choice of either
+ (1) being abi incompatible with a -march switch, or
+ (2) generating an error.
+ Given no good solution, I think the safest thing is one warning.
+ The user won't be able to use -Werror, but....
+
+ Choose the STRUCT_VALUE_RTX hook because that's (at present) only
+ called in response to actually generating a caller or callee that
+ uses such a type. As opposed to RETURN_IN_MEMORY, which is called
+ via aggregate_value_p for general type probing from tree-ssa. */
+
+static rtx
+ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
+{
+ static bool warnedsse, warnedmmx;
+
+ if (type)
+ {
+ /* Look at the return type of the function, not the function type. */
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
+
+ if (!TARGET_SSE && !warnedsse)
+ {
+ if (mode == TImode
+ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ {
+ warnedsse = true;
+ warning (0, "SSE vector return without SSE enabled "
+ "changes the ABI");
+ }
+ }
+
+ if (!TARGET_MMX && !warnedmmx)
+ {
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector return without MMX enabled "
+ "changes the ABI");
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+rtx
+ix86_libcall_value (enum machine_mode mode)
+{
+ if (TARGET_64BIT)
+ {
+ switch (mode)
+ {
+ case SFmode:
+ case SCmode:
+ case DFmode:
+ case DCmode:
+ case TFmode:
+ case SDmode:
+ case DDmode:
+ case TDmode:
+ return gen_rtx_REG (mode, FIRST_SSE_REG);
+ case XFmode:
+ case XCmode:
+ return gen_rtx_REG (mode, FIRST_FLOAT_REG);
+ case TCmode:
+ return NULL;
+ default:
+ return gen_rtx_REG (mode, 0);
+ }
+ }
+ else
+ return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
+}
+
+/* Given a mode, return the register to use for a return value. */
+
+static int
+ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ return TARGET_MMX ? FIRST_MMX_REG : 0;
+
+ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
+ if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ return TARGET_SSE ? FIRST_SSE_REG : 0;
+
+ /* Decimal floating point values can go in %eax, unlike other float modes. */
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return 0;
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (SSE_FLOAT_MODE_P(mode)
+ && fntype && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (fntype)))
+ return FIRST_SSE_REG;
+ /* APPLE LOCAL end regparmandstackparm */
+
+ /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
+ if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
+ return 0;
+
+ /* Floating point return values in %st(0), except for local functions when
+ SSE math is enabled or for functions with sseregparm attribute. */
+ if ((func || fntype)
+ && (mode == SFmode || mode == DFmode))
+ {
+ int sse_level = ix86_function_sseregparm (fntype, func);
+ if ((sse_level >= 1 && mode == SFmode)
+ || (sse_level == 2 && mode == DFmode))
+ return FIRST_SSE_REG;
+ }
+
+ return FIRST_FLOAT_REG;
+}
+
+/* Create the va_list data type. */
+
+static tree
+ix86_build_builtin_va_list (void)
+{
+ tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+ /* For i386 we use plain pointer to argument area. */
+ if (!TARGET_64BIT)
+ return build_pointer_type (char_type_node);
+
+ record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+ f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
+ unsigned_type_node);
+ f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
+ unsigned_type_node);
+ f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
+ ptr_type_node);
+ f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
+ ptr_type_node);
+
+ va_list_gpr_counter_field = f_gpr;
+ va_list_fpr_counter_field = f_fpr;
+
+ DECL_FIELD_CONTEXT (f_gpr) = record;
+ DECL_FIELD_CONTEXT (f_fpr) = record;
+ DECL_FIELD_CONTEXT (f_ovf) = record;
+ DECL_FIELD_CONTEXT (f_sav) = record;
+
+ TREE_CHAIN (record) = type_decl;
+ TYPE_NAME (record) = type_decl;
+ TYPE_FIELDS (record) = f_gpr;
+ TREE_CHAIN (f_gpr) = f_fpr;
+ TREE_CHAIN (f_fpr) = f_ovf;
+ TREE_CHAIN (f_ovf) = f_sav;
+
+ layout_type (record);
+
+ /* The correct type is an array type of one element. */
+ return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
+
+static void
+ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int *pretend_size ATTRIBUTE_UNUSED,
+ int no_rtl)
+{
+ CUMULATIVE_ARGS next_cum;
+ rtx save_area = NULL_RTX, mem;
+ rtx label;
+ rtx label_ref;
+ rtx tmp_reg;
+ rtx nsse_reg;
+ int set;
+ tree fntype;
+ int stdarg_p;
+ int i;
+
+ if (!TARGET_64BIT)
+ return;
+
+ if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
+ return;
+
+ /* Indicate to allocate space on the stack for varargs save area. */
+ ix86_save_varrargs_registers = 1;
+
+ cfun->stack_alignment_needed = 128;
+
+ fntype = TREE_TYPE (current_function_decl);
+ stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+ != void_type_node));
+
+ /* For varargs, we do not want to skip the dummy va_dcl argument.
+ For stdargs, we do want to skip the last named argument. */
+ next_cum = *cum;
+ if (stdarg_p)
+ function_arg_advance (&next_cum, mode, type, 1);
+
+ if (!no_rtl)
+ save_area = frame_pointer_rtx;
+
+ set = get_varargs_alias_set ();
+
+ for (i = next_cum.regno;
+ i < ix86_regparm
+ && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ i++)
+ {
+ mem = gen_rtx_MEM (Pmode,
+ plus_constant (save_area, i * UNITS_PER_WORD));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ emit_move_insn (mem, gen_rtx_REG (Pmode,
+ x86_64_int_parameter_registers[i]));
+ }
+
+ if (next_cum.sse_nregs && cfun->va_list_fpr_size)
+ {
+ /* Now emit code to save SSE registers. The AX parameter contains number
+ of SSE parameter registers used to call this function. We use
+ sse_prologue_save insn template that produces computed jump across
+ SSE saves. We need some preparation work to get this working. */
+
+ label = gen_label_rtx ();
+ label_ref = gen_rtx_LABEL_REF (Pmode, label);
+
+ /* Compute address to jump to :
+ label - 5*eax + nnamed_sse_arguments*5 */
+ tmp_reg = gen_reg_rtx (Pmode);
+ nsse_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_MULT (Pmode, nsse_reg,
+ GEN_INT (4))));
+ if (next_cum.sse_regno)
+ emit_move_insn
+ (nsse_reg,
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ label_ref,
+ GEN_INT (next_cum.sse_regno * 4))));
+ else
+ emit_move_insn (nsse_reg, label_ref);
+ emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
+
+ /* Compute address of memory block we save into. We always use pointer
+ pointing 127 bytes after first byte to store - this is needed to keep
+ instruction size limited by 4 bytes. */
+ tmp_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ plus_constant (save_area,
+ 8 * REGPARM_MAX + 127)));
+ mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, BITS_PER_WORD);
+
+ /* And finally do the dirty job! */
+ emit_insn (gen_sse_prologue_save (mem, nsse_reg,
+ GEN_INT (next_cum.sse_regno), label));
+ }
+
+}
+
+/* Implement va_start. */
+
+void
+ix86_va_start (tree valist, rtx nextarg)
+{
+ HOST_WIDE_INT words, n_gpr, n_fpr;
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ tree type;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT)
+ {
+ std_expand_builtin_va_start (valist, nextarg);
+ return;
+ }
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ /* Count number of gp and fp argument registers used. */
+ words = current_function_args_info.words;
+ n_gpr = current_function_args_info.regno;
+ n_fpr = current_function_args_info.sse_regno;
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
+ (int) words, (int) n_gpr, (int) n_fpr);
+
+ if (cfun->va_list_gpr_size)
+ {
+ type = TREE_TYPE (gpr);
+ t = build2 (MODIFY_EXPR, type, gpr,
+ build_int_cst (type, n_gpr * 8));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ if (cfun->va_list_fpr_size)
+ {
+ type = TREE_TYPE (fpr);
+ t = build2 (MODIFY_EXPR, type, fpr,
+ build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ /* Find the overflow area. */
+ type = TREE_TYPE (ovf);
+ t = make_tree (type, virtual_incoming_args_rtx);
+ if (words != 0)
+ t = build2 (PLUS_EXPR, type, t,
+ build_int_cst (type, words * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, type, ovf, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ {
+ /* Find the register save area.
+ Prologue of the function save it right above stack frame. */
+ type = TREE_TYPE (sav);
+ t = make_tree (type, frame_pointer_rtx);
+ t = build2 (MODIFY_EXPR, type, sav, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+}
+
+/* Implement va_arg. */
+
+tree
+ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
+{
+ static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ int size, rsize;
+ tree lab_false, lab_over = NULL_TREE;
+ tree addr, t2;
+ rtx container;
+ int indirect_p = 0;
+ tree ptrtype;
+ enum machine_mode nat_mode;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT)
+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ valist = build_va_arg_indirect_ref (valist);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+ if (indirect_p)
+ type = build_pointer_type (type);
+ size = int_size_in_bytes (type);
+ rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ nat_mode = type_natural_mode (type);
+ container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
+ REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
+
+ /* Pull the value out of the saved registers. */
+
+ addr = create_tmp_var (ptr_type_node, "addr");
+ DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
+
+ if (container)
+ {
+ int needed_intregs, needed_sseregs;
+ bool need_temp;
+ tree int_addr, sse_addr;
+
+ lab_false = create_artificial_label ();
+ lab_over = create_artificial_label ();
+
+ examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+
+ need_temp = (!REG_P (container)
+ && ((needed_intregs && TYPE_ALIGN (type) > 64)
+ || TYPE_ALIGN (type) > 128));
+
+ /* In case we are passing structure, verify that it is consecutive block
+ on the register save area. If not we need to do moves. */
+ if (!need_temp && !REG_P (container))
+ {
+ /* Verify that all registers are strictly consecutive */
+ if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 16)
+ need_temp = 1;
+ }
+ }
+ else
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 8)
+ need_temp = 1;
+ }
+ }
+ }
+ if (!need_temp)
+ {
+ int_addr = addr;
+ sse_addr = addr;
+ }
+ else
+ {
+ int_addr = create_tmp_var (ptr_type_node, "int_addr");
+ DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
+ sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
+ DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
+ }
+
+ /* First ensure that we fit completely in registers. */
+ if (needed_intregs)
+ {
+ t = build_int_cst (TREE_TYPE (gpr),
+ (REGPARM_MAX - needed_intregs + 1) * 8);
+ t = build2 (GE_EXPR, boolean_type_node, gpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ t = build_int_cst (TREE_TYPE (fpr),
+ (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+ + REGPARM_MAX * 8);
+ t = build2 (GE_EXPR, boolean_type_node, fpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+
+ /* Compute index to start of area used for integer regs. */
+ if (needed_intregs)
+ {
+ /* int_addr = gpr + sav; */
+ t = fold_convert (ptr_type_node, gpr);
+ t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
+ t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ /* sse_addr = fpr + sav; */
+ t = fold_convert (ptr_type_node, fpr);
+ t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
+ t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (need_temp)
+ {
+ int i;
+ tree temp = create_tmp_var (type, "va_arg_tmp");
+
+ /* addr = &temp; */
+ t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
+ t = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ gimplify_and_add (t, pre_p);
+
+ for (i = 0; i < XVECLEN (container, 0); i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ rtx reg = XEXP (slot, 0);
+ enum machine_mode mode = GET_MODE (reg);
+ tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
+ tree addr_type = build_pointer_type (piece_type);
+ tree src_addr, src;
+ int src_offset;
+ tree dest_addr, dest;
+
+ if (SSE_REGNO_P (REGNO (reg)))
+ {
+ src_addr = sse_addr;
+ src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
+ }
+ else
+ {
+ src_addr = int_addr;
+ src_offset = REGNO (reg) * 8;
+ }
+ src_addr = fold_convert (addr_type, src_addr);
+ src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
+ size_int (src_offset)));
+ src = build_va_arg_indirect_ref (src_addr);
+
+ dest_addr = fold_convert (addr_type, addr);
+ dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
+ size_int (INTVAL (XEXP (slot, 1)))));
+ dest = build_va_arg_indirect_ref (dest_addr);
+
+ t = build2 (MODIFY_EXPR, void_type_node, dest, src);
+ gimplify_and_add (t, pre_p);
+ }
+ }
+
+ if (needed_intregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
+ build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
+ build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
+ gimplify_and_add (t, pre_p);
+ }
+
+ t = build1 (GOTO_EXPR, void_type_node, lab_over);
+ gimplify_and_add (t, pre_p);
+
+ t = build1 (LABEL_EXPR, void_type_node, lab_false);
+ append_to_statement_list (t, pre_p);
+ }
+
+ /* ... otherwise out of the overflow area. */
+
+ /* Care for on-stack alignment if needed. */
+ if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
+ || integer_zerop (TYPE_SIZE (type)))
+ t = ovf;
+ else
+ {
+ HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
+ t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
+ build_int_cst (TREE_TYPE (ovf), align - 1));
+ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+ build_int_cst (TREE_TYPE (t), -align));
+ }
+ gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+ t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ gimplify_and_add (t2, pre_p);
+
+ t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
+ build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+ gimplify_and_add (t, pre_p);
+
+ if (container)
+ {
+ t = build1 (LABEL_EXPR, void_type_node, lab_over);
+ append_to_statement_list (t, pre_p);
+ }
+
+ ptrtype = build_pointer_type (type);
+ addr = fold_convert (ptrtype, addr);
+
+ if (indirect_p)
+ addr = build_va_arg_indirect_ref (addr);
+ return build_va_arg_indirect_ref (addr);
+}
+
+/* Return nonzero if OPNUM's MEM should be matched
+ in movabs* patterns. */
+
+int
+ix86_check_movabs (rtx insn, int opnum)
+{
+ rtx set, mem;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ mem = XEXP (set, opnum);
+ while (GET_CODE (mem) == SUBREG)
+ mem = SUBREG_REG (mem);
+ gcc_assert (GET_CODE (mem) == MEM);
+ return (volatile_ok || !MEM_VOLATILE_P (mem));
+}
+
+/* Initialize the table of extra 80387 mathematical constants. */
+
+static void
+init_ext_80387_constants (void)
+{
+ static const char * cst[5] =
+ {
+ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
+ "0.6931471805599453094286904741849753009", /* 1: fldln2 */
+ "1.4426950408889634073876517827983434472", /* 2: fldl2e */
+ "3.3219280948873623478083405569094566090", /* 3: fldl2t */
+ "3.1415926535897932385128089594061862044", /* 4: fldpi */
+ };
+ int i;
+
+ for (i = 0; i < 5; i++)
+ {
+ real_from_string (&ext_80387_constants_table[i], cst[i]);
+ /* Ensure each constant is rounded to XFmode precision. */
+ real_convert (&ext_80387_constants_table[i],
+ XFmode, &ext_80387_constants_table[i]);
+ }
+
+ ext_80387_constants_init = 1;
+}
+
+/* Return true if the constant is something that can be loaded with
+ a special instruction. */
+
+int
+standard_80387_constant_p (rtx x)
+{
+ if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
+ return -1;
+
+ if (x == CONST0_RTX (GET_MODE (x)))
+ return 1;
+ if (x == CONST1_RTX (GET_MODE (x)))
+ return 2;
+
+ /* For XFmode constants, try to find a special 80387 instruction when
+ optimizing for size or on those CPUs that benefit from them. */
+ if (GET_MODE (x) == XFmode
+ && (optimize_size || x86_ext_80387_constants & TUNEMASK))
+ {
+ REAL_VALUE_TYPE r;
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ for (i = 0; i < 5; i++)
+ if (real_identical (&r, &ext_80387_constants_table[i]))
+ return i + 3;
+ }
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_80387_constant_opcode (rtx x)
+{
+ switch (standard_80387_constant_p (x))
+ {
+ case 1:
+ return "fldz";
+ case 2:
+ return "fld1";
+ case 3:
+ return "fldlg2";
+ case 4:
+ return "fldln2";
+ case 5:
+ return "fldl2e";
+ case 6:
+ return "fldl2t";
+ case 7:
+ return "fldpi";
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+ loaded by the specified special instruction. The argument IDX
+ matches the return value from standard_80387_constant_p. */
+
+rtx
+standard_80387_constant_rtx (int idx)
+{
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ switch (idx)
+ {
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ i = idx - 3;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
+ XFmode);
+}
+
+/* Return 1 if mode is a valid mode for sse. */
+static int
+standard_sse_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
+ */
+int
+standard_sse_constant_p (rtx x)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ return 1;
+ if (vector_all_ones_operand (x, mode)
+ && standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -1;
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+ switch (standard_sse_constant_p (x))
+ {
+ case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else if (get_attr_mode (insn) == MODE_V2DF)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 2:
+ return "pcmpeqd\t%0, %0";
+ }
+ gcc_unreachable ();
+}
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+ const char *fmt;
+ int i;
+
+ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+ return 1;
+
+ fmt = GET_RTX_FORMAT (GET_CODE (op));
+ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+ return 1;
+ }
+
+ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return 1 if it is appropriate to emit `ret' instructions in the
+ body of a function. Do this only if the epilogue is simple, needing a
+ couple of insns. Prior to reloading, we can't tell how many registers
+ must be saved, so return 0 then. Return 0 if there is no frame
+ marker to de-allocate. */
+
+int
+ix86_can_use_return_insn_p (void)
+{
+ struct ix86_frame frame;
+
+ if (! reload_completed || frame_pointer_needed)
+ return 0;
+
+ /* Don't allow more than 32 pop, since that's all we can do
+ with one instruction. */
+ if (current_function_pops_args
+ && current_function_args_size >= 32768)
+ return 0;
+
+ ix86_compute_frame_layout (&frame);
+ return frame.to_allocate == 0 && frame.nregs == 0;
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms may
+ be accessed via the stack pointer) in functions that seem suitable. */
+
+int
+ix86_frame_pointer_required (void)
+{
+ /* If we accessed previous frames, then the generated code expects
+ to be able to access the saved ebp value in our frame. */
+ if (cfun->machine->accesses_prev_frame)
+ return 1;
+
+ /* Several x86 os'es need a frame pointer for other reasons,
+ usually pertaining to setjmp. */
+ if (SUBTARGET_FRAME_POINTER_REQUIRED)
+ return 1;
+
+ /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
+ the frame pointer by default. Turn it back on now if we've not
+ got a leaf function. */
+ if (TARGET_OMIT_LEAF_FRAME_POINTER
+ && (!current_function_is_leaf
+ || ix86_current_function_calls_tls_descriptor))
+ return 1;
+
+ if (current_function_profile)
+ return 1;
+
+ return 0;
+}
+
+/* Record that the current function accesses previous call frames. */
+
+void
+ix86_setup_frame_addresses (void)
+{
+ cfun->machine->accesses_prev_frame = 1;
+}
+
+#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* APPLE LOCAL 5695218 */
+static GTY(()) int pic_labels_used;
+
+/* Fills in the label name that should be used for a pc thunk for
+ the given register. */
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ /* APPLE LOCAL deep branch prediction pic-base. */
+ if (USE_HIDDEN_LINKONCE || TARGET_MACHO)
+ sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
+ else
+ ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
+}
+
+
+/* This function generates code for -fpic that loads %ebx with
+ the return address of the caller and then returns. */
+
+void
+ix86_file_end (void)
+{
+ rtx xops[2];
+ int regno;
+
+ for (regno = 0; regno < 8; ++regno)
+ {
+ char name[32];
+
+ if (! ((pic_labels_used >> regno) & 1))
+ continue;
+
+ get_pc_thunk_name (name, regno);
+
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ switch_to_section (darwin_sections[text_coal_section]);
+ fputs ("\t.weak_definition\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n\t.private_extern\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n", asm_out_file);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+ else
+#endif
+ if (USE_HIDDEN_LINKONCE)
+ {
+ tree decl;
+
+ decl = build_decl (FUNCTION_DECL, get_identifier (name),
+ error_mark_node);
+ TREE_PUBLIC (decl) = 1;
+ TREE_STATIC (decl) = 1;
+ DECL_ONE_ONLY (decl) = 1;
+
+ (*targetm.asm_out.unique_section) (decl, 0);
+ switch_to_section (get_named_section (decl, NULL, 0));
+
+ (*targetm.asm_out.globalize_label) (asm_out_file, name);
+ fputs ("\t.hidden\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputc ('\n', asm_out_file);
+ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+ }
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+#if TARGET_MACHO
+ else if (TARGET_MACHO)
+ {
+ switch_to_section (darwin_sections[text_coal_section]);
+ fputs (".weak_definition\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n.private_extern\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n", asm_out_file);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+#endif
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ else
+ {
+ switch_to_section (text_section);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+
+ xops[0] = gen_rtx_REG (SImode, regno);
+ xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
+ output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("ret", xops);
+ }
+
+ if (NEED_INDICATE_EXEC_STACK)
+ file_end_indicate_exec_stack ();
+}
+
+/* Emit code for the SET_GOT patterns. */
+
+const char *
+output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
+{
+ rtx xops[3];
+
+ xops[0] = dest;
+ xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+
+ if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+ {
+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+
+ if (!flag_pic)
+ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+ else
+ /* APPLE LOCAL begin dwarf call/pop 5221468 */
+ {
+ output_asm_insn ("call\t%a2", xops);
+
+ /* If necessary, report the effect that the instruction has on
+ the unwind info. */
+#if defined (DWARF2_UNWIND_INFO)
+ if (flag_asynchronous_unwind_tables
+#if !defined (HAVE_prologue)
+ && !ACCUMULATE_OUTGOING_ARGS
+#endif
+ && dwarf2out_do_frame ())
+ {
+ rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (-4)));
+ insn = make_insn_raw (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ }
+#endif
+ }
+ /* APPLE LOCAL end dwarf call/pop 5221468 */
+
+#if TARGET_MACHO
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+#endif
+
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+
+ if (flag_pic)
+ /* APPLE LOCAL begin dwarf call/pop 5221468 */
+ {
+ output_asm_insn ("pop{l}\t%0", xops);
+
+ /* If necessary, report the effect that the instruction has on
+ the unwind info. We've already done this for delay slots
+ and call instructions. */
+#if defined (DWARF2_UNWIND_INFO)
+ if (flag_asynchronous_unwind_tables
+#if !defined (HAVE_prologue)
+ && !ACCUMULATE_OUTGOING_ARGS
+#endif
+ && dwarf2out_do_frame ())
+ {
+ rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (4)));
+ insn = make_insn_raw (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ }
+#endif
+ }
+ /* APPLE LOCAL end dwarf call/pop 5221468 */
+ }
+ else
+ {
+ char name[32];
+ get_pc_thunk_name (name, REGNO (dest));
+ pic_labels_used |= 1 << REGNO (dest);
+
+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+ xops[2] = gen_rtx_MEM (QImode, xops[2]);
+ output_asm_insn ("call\t%X2", xops);
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+#if TARGET_MACHO
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+ else
+ targetm.asm_out.internal_label (asm_out_file, "L",
+ CODE_LABEL_NUMBER (label));
+#endif
+ }
+
+ if (TARGET_MACHO)
+ return "";
+
+ if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+ output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
+ else
+ output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+
+ return "";
+}
+
+/* Generate an "push" pattern for input ARG. */
+
+static rtx
+gen_push (rtx arg)
+{
+ return gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ arg);
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+ for the entire function. */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+ if (current_function_is_leaf && !current_function_profile
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ int i;
+ for (i = 2; i >= 0; --i)
+ if (!regs_ever_live[i])
+ return i;
+ }
+
+ return INVALID_REGNUM;
+}
+
+/* APPLE LOCAL begin 5695218 */
+/* Reload may introduce references to the PIC base register
+ that do not directly reference pic_offset_table_rtx.
+ In the rare event we choose an alternate PIC register,
+ walk all the insns and rewrite every reference. */
+/* Run through the insns, changing references to the original
+ PIC_OFFSET_TABLE_REGNUM to our new one. */
+static void
+ix86_globally_replace_pic_reg (unsigned int new_pic_regno)
+{
+ rtx insn;
+ const int nregs = PIC_OFFSET_TABLE_REGNUM + 1;
+ rtx reg_map[FIRST_PSEUDO_REGISTER];
+ memset (reg_map, 0, nregs * sizeof (rtx));
+ pic_offset_table_rtx = gen_rtx_REG (SImode, new_pic_regno);
+ reg_map[REAL_PIC_OFFSET_TABLE_REGNUM] = pic_offset_table_rtx;
+
+ push_topmost_sequence ();
+ for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
+ {
+ replace_regs (PATTERN (insn), reg_map, nregs, 1);
+ replace_regs (REG_NOTES (insn), reg_map, nregs, 1);
+ }
+#if defined (TARGET_TOC)
+ else if (GET_CODE (insn) == CALL_INSN)
+ {
+ if ( !SIBLING_CALL_P (insn))
+ abort ();
+ }
+#endif
+ }
+ pop_topmost_sequence ();
+
+ regs_ever_live[new_pic_regno] = 1;
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0;
+#if defined (TARGET_TOC)
+ cfun->machine->substitute_pic_base_reg = new_pic_regno;
+#endif
+}
+/* APPLE LOCAL end 5695218 */
+
+/* Return 1 if we need to save REGNO. */
+static int
+ix86_save_reg (unsigned int regno, int maybe_eh_return)
+{
+ /* APPLE LOCAL begin CW asm blocks */
+ /* For an asm function, we don't save any registers, instead, the
+ user is responsible. */
+ if (cfun->iasm_asm_function)
+ return 0;
+ /* APPLE LOCAL end CW asm blocks */
+
+ if (pic_offset_table_rtx
+ && regno == REAL_PIC_OFFSET_TABLE_REGNUM
+ /* APPLE LOCAL begin 5695218 */
+ && (current_function_uses_pic_offset_table
+ || current_function_profile
+ || current_function_calls_eh_return
+ || current_function_uses_const_pool))
+ /* APPLE LOCAL end 5695218 */
+ {
+ if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
+ return 0;
+ return 1;
+ }
+
+ if (current_function_calls_eh_return && maybe_eh_return)
+ {
+ unsigned i;
+ for (i = 0; ; i++)
+ {
+ unsigned test = EH_RETURN_DATA_REGNO (i);
+ if (test == INVALID_REGNUM)
+ break;
+ if (test == regno)
+ return 1;
+ }
+ }
+
+ if (cfun->machine->force_align_arg_pointer
+ && regno == REGNO (cfun->machine->force_align_arg_pointer))
+ return 1;
+
+ /* APPLE LOCAL begin 5695218 */
+ /* In order to get accurate usage info for the PIC register, we've
+ been forced to break and un-break the call_used_regs and
+ fixed_regs vectors. Ignore them when considering the PIC
+ register. */
+ if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
+ && regs_ever_live[regno])
+ return 1;
+ /* APPLE LOCAL end 5695218 */
+
+ return (regs_ever_live[regno]
+ && !call_used_regs[regno]
+ && !fixed_regs[regno]
+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
+}
+
+/* Return number of registers to be saved on the stack. */
+
+static int
+ix86_nsaved_regs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+ if (ix86_save_reg (regno, true))
+ nregs++;
+ return nregs;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+HOST_WIDE_INT
+ix86_initial_elimination_offset (int from, int to)
+{
+ struct ix86_frame frame;
+ ix86_compute_frame_layout (&frame);
+
+ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset;
+ else if (from == FRAME_POINTER_REGNUM
+ && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
+ else
+ {
+ gcc_assert (to == STACK_POINTER_REGNUM);
+
+ if (from == ARG_POINTER_REGNUM)
+ return frame.stack_pointer_offset;
+
+ gcc_assert (from == FRAME_POINTER_REGNUM);
+ return frame.stack_pointer_offset - frame.frame_pointer_offset;
+ }
+}
+
+/* Fill structure ix86_frame about frame of currently computed function. */
+
+static void
+ix86_compute_frame_layout (struct ix86_frame *frame)
+{
+ HOST_WIDE_INT total_size;
+ unsigned int stack_alignment_needed;
+ HOST_WIDE_INT offset;
+ unsigned int preferred_alignment;
+ HOST_WIDE_INT size = get_frame_size ();
+
+ frame->nregs = ix86_nsaved_regs ();
+ total_size = size;
+
+ stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
+ preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
+
+ /* During reload iteration the amount of registers saved can change.
+ Recompute the value as needed. Do not recompute when amount of registers
+ didn't change as reload does multiple calls to the function and does not
+ expect the decision to change within single iteration. */
+ if (!optimize_size
+ && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+ {
+ int count = frame->nregs;
+
+ cfun->machine->use_fast_prologue_epilogue_nregs = count;
+ /* The fast prologue uses move instead of push to save registers. This
+ is significantly longer, but also executes faster as modern hardware
+ can execute the moves in parallel, but can't do that for push/pop.
+
+ Be careful about choosing what prologue to emit: When function takes
+ many instructions to execute we may use slow version as well as in
+ case function is known to be outside hot spot (this is known with
+ feedback only). Weight the size of function by number of registers
+ to save as it is cheap to use one or two push instructions but very
+ slow to use many of them. */
+ if (count)
+ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+ if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ || (flag_branch_probabilities
+ && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ cfun->machine->use_fast_prologue_epilogue = false;
+ else
+ cfun->machine->use_fast_prologue_epilogue
+ = !expensive_function_p (count);
+ }
+ if (TARGET_PROLOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue)
+ frame->save_regs_using_mov = true;
+ else
+ frame->save_regs_using_mov = false;
+
+
+ /* Skip return address and saved base pointer. */
+ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
+
+ frame->hard_frame_pointer_offset = offset;
+
+ /* Do some sanity checking of stack_alignment_needed and
+ preferred_alignment, since i386 port is the only using those features
+ that may break easily. */
+
+ gcc_assert (!size || stack_alignment_needed);
+ gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (stack_alignment_needed
+ <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+
+ if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
+ stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
+
+ /* Register save area */
+ offset += frame->nregs * UNITS_PER_WORD;
+
+ /* Va-arg area */
+ if (ix86_save_varrargs_registers)
+ {
+ offset += X86_64_VARARGS_SIZE;
+ frame->va_arg_size = X86_64_VARARGS_SIZE;
+ }
+ else
+ frame->va_arg_size = 0;
+
+ /* Align start of frame for local function. */
+ frame->padding1 = ((offset + stack_alignment_needed - 1)
+ & -stack_alignment_needed) - offset;
+
+ offset += frame->padding1;
+
+ /* Frame pointer points here. */
+ frame->frame_pointer_offset = offset;
+
+ offset += size;
+
+ /* Add outgoing arguments area. Can be skipped if we eliminated
+ all the function calls as dead code.
+ Skipping is however impossible when function calls alloca. Alloca
+ expander assumes that last current_function_outgoing_args_size
+ of stack frame are unused. */
+ if (ACCUMULATE_OUTGOING_ARGS
+ && (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor))
+ {
+ offset += current_function_outgoing_args_size;
+ frame->outgoing_arguments_size = current_function_outgoing_args_size;
+ }
+ else
+ frame->outgoing_arguments_size = 0;
+
+ /* Align stack boundary. Only needed if we're calling another function
+ or using alloca. */
+ if (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor)
+ frame->padding2 = ((offset + preferred_alignment - 1)
+ & -preferred_alignment) - offset;
+ else
+ frame->padding2 = 0;
+
+ offset += frame->padding2;
+
+ /* We've reached end of stack frame. */
+ frame->stack_pointer_offset = offset;
+
+ /* Size prologue needs to allocate. */
+ frame->to_allocate =
+ (size + frame->padding1 + frame->padding2
+ + frame->outgoing_arguments_size + frame->va_arg_size);
+
+ if ((!frame->to_allocate && frame->nregs <= 1)
+ || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+ frame->save_regs_using_mov = false;
+
+ if (TARGET_RED_ZONE && current_function_sp_is_unchanging
+ && current_function_is_leaf
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ frame->red_zone_size = frame->to_allocate;
+ if (frame->save_regs_using_mov)
+ frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+ }
+ else
+ frame->red_zone_size = 0;
+ frame->to_allocate -= frame->red_zone_size;
+ frame->stack_pointer_offset -= frame->red_zone_size;
+#if 0
+ fprintf (stderr, "nregs: %i\n", frame->nregs);
+ fprintf (stderr, "size: %i\n", size);
+ fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
+ fprintf (stderr, "padding1: %i\n", frame->padding1);
+ fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
+ fprintf (stderr, "padding2: %i\n", frame->padding2);
+ fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
+ fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
+ fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
+ fprintf (stderr, "hard_frame_pointer_offset: %i\n",
+ frame->hard_frame_pointer_offset);
+ fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
+#endif
+}
+
+/* Emit code to save registers in the prologue. */
+
+static void
+ix86_emit_save_regs (void)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
+ if (ix86_save_reg (regno, true))
+ {
+ insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, true))
+ {
+ insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
+ Pmode, offset),
+ gen_rtx_REG (Pmode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += UNITS_PER_WORD;
+ }
+}
+
+/* Expand prologue or epilogue stack adjustment.
+ The pattern exist to put a dependency on all ebp-based memory accesses.
+ STYLE should be negative if instructions should be marked as frame related,
+ zero if %r11 register is live and cannot be freely used and positive
+ otherwise. */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
+{
+ rtx insn;
+
+ if (! TARGET_64BIT)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+ else if (x86_64_immediate_operand (offset, DImode))
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+ else
+ {
+ rtx r11;
+ /* r11 is used by indirect sibcall return as well, set before the
+ epilogue and used after the epilogue. ATM indirect sibcall
+ shouldn't be used together with huge frame sizes in one
+ function because of the frame_size check in sibcall.c. */
+ gcc_assert (style);
+ r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+ /* APPLE LOCAL async unwind info 5949469 */
+ if (style < 0 /* || flag_asynchronous_unwind_tables*/)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
+ offset));
+ }
+ if (style < 0)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ /* APPLE LOCAL begin async unwind info 5949350 5949469 */
+#if 0
+ else if (flag_asynchronous_unwind_tables
+ && (src == hard_frame_pointer_rtx
+ || src == stack_pointer_rtx))
+ RTX_FRAME_RELATED_P (insn) = 1;
+#endif
+ /* APPLE LOCAL end async unwind info 5949350 5949469 */
+}
+
+/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
+
+static rtx
+ix86_internal_arg_pointer (void)
+{
+ bool has_force_align_arg_pointer =
+ (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
+ if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+ && DECL_NAME (current_function_decl)
+ && MAIN_NAME_P (DECL_NAME (current_function_decl))
+ && DECL_FILE_SCOPE_P (current_function_decl))
+ || ix86_force_align_arg_pointer
+ || has_force_align_arg_pointer)
+ {
+ /* Nested functions can't realign the stack due to a register
+ conflict. */
+ if (DECL_CONTEXT (current_function_decl)
+ && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
+ {
+ if (ix86_force_align_arg_pointer)
+ warning (0, "-mstackrealign ignored for nested functions");
+ if (has_force_align_arg_pointer)
+ error ("%s not supported for nested functions",
+ ix86_force_align_arg_pointer_string);
+ return virtual_incoming_args_rtx;
+ }
+ cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
+ return copy_to_reg (cfun->machine->force_align_arg_pointer);
+ }
+ else
+ return virtual_incoming_args_rtx;
+}
+
+/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
+ This is called from dwarf2out.c to emit call frame instructions
+ for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
+static void
+ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
+{
+ rtx unspec = SET_SRC (pattern);
+ gcc_assert (GET_CODE (unspec) == UNSPEC);
+
+ switch (index)
+ {
+ case UNSPEC_REG_SAVE:
+ dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
+ SET_DEST (pattern));
+ break;
+ case UNSPEC_DEF_CFA:
+ dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
+ INTVAL (XVECEXP (unspec, 0, 0)));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* APPLE LOCAL begin 3399553 */
+/* Calculate the value of FLT_ROUNDS into DEST.
+
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+*/
+void
+ix86_expand_flt_rounds (rtx dest)
+{
+ rtx mem = assign_stack_temp (HImode, GET_MODE_SIZE (HImode), 0);
+ rtx temp = gen_reg_rtx (SImode);
+
+ /* Step #1: Read FPSR. Unfortunately, this can only be done into a
+ 16-bit memory location. */
+ emit_insn (gen_x86_fnstcw_1 (mem));
+
+ /* Step #2: Copy into a register. */
+ emit_insn (gen_zero_extendhisi2 (dest, mem));
+
+ /* Step #3: Perform conversion described above. */
+ emit_insn (gen_andsi3 (temp, dest, GEN_INT (0x400)));
+ emit_insn (gen_andsi3 (dest, dest, GEN_INT (0x800)));
+ emit_insn (gen_lshrsi3 (temp, temp, GEN_INT (9)));
+ emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (11)));
+ emit_insn (gen_iorsi3 (dest, dest, temp));
+ emit_insn (gen_addsi3 (dest, dest, const1_rtx));
+ emit_insn (gen_andsi3 (dest, dest, GEN_INT (3)));
+}
+/* APPLE LOCAL end 3399553 */
+
+/* APPLE LOCAL begin fix-and-continue x86 */
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+/* APPLE LOCAL end fix-and-continue x86 */
+
+/* Expand the prologue into a bunch of separate insns. */
+
+void
+ix86_expand_prologue (void)
+{
+ rtx insn;
+ bool pic_reg_used;
+ struct ix86_frame frame;
+ HOST_WIDE_INT allocate;
+
+ /* APPLE LOCAL begin fix-and-continue x86 */
+ if (TARGET_FIX_AND_CONTINUE)
+ {
+ /* gdb on darwin arranges to forward a function from the old
+ address by modifying the first 6 instructions of the function
+ to branch to the overriding function. This is necessary to
+ permit function pointers that point to the old function to
+ actually forward to the new function. */
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ }
+ /* APPLE LOCAL end fix-and-continue x86 */
+
+ ix86_compute_frame_layout (&frame);
+
+ if (cfun->machine->force_align_arg_pointer)
+ {
+ rtx x, y;
+
+ /* Grab the argument pointer. */
+ x = plus_constant (stack_pointer_rtx, 4);
+ y = cfun->machine->force_align_arg_pointer;
+ insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* The unwind info consists of two parts: install the fafp as the cfa,
+ and record the fafp as the "save register" of the stack pointer.
+ The later is there in order that the unwinder can see where it
+ should restore the stack pointer across the and insn. */
+ x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
+ x = gen_rtx_SET (VOIDmode, y, x);
+ RTX_FRAME_RELATED_P (x) = 1;
+ y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
+ UNSPEC_REG_SAVE);
+ y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
+ RTX_FRAME_RELATED_P (y) = 1;
+ x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
+ x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
+ REG_NOTES (insn) = x;
+
+ /* Align the stack. */
+ emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-16)));
+
+ /* And here we cheat like madmen with the unwind info. We force the
+ cfa register back to sp+4, which is exactly what it was at the
+ start of the function. Re-pushing the return address results in
+ the return at the same spot relative to the cfa, and thus is
+ correct wrt the unwind info. */
+ x = cfun->machine->force_align_arg_pointer;
+ x = gen_frame_mem (Pmode, plus_constant (x, -4));
+ insn = emit_insn (gen_push (x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ x = GEN_INT (4);
+ x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
+ REG_NOTES (insn) = x;
+ }
+
+ /* Note: AT&T enter does NOT have reversed args. Enter is probably
+ slower on all targets. Also sdb doesn't like it. */
+
+ if (frame_pointer_needed)
+ {
+ insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ allocate = frame.to_allocate;
+
+ if (!frame.save_regs_using_mov)
+ ix86_emit_save_regs ();
+ else
+ allocate += frame.nregs * UNITS_PER_WORD;
+
+ /* When using red zone we may start register saving before allocating
+ the stack frame saving one cycle of the prologue. */
+ if (TARGET_RED_ZONE && frame.save_regs_using_mov)
+ ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
+ : stack_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+
+ if (allocate == 0)
+ ;
+ else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
+ /* APPLE LOCAL begin CW asm blocks */
+ {
+ if (! cfun->iasm_asm_function)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-allocate), -1);
+ }
+ /* APPLE LOCAL end CW asm blocks */
+ else
+ {
+ /* Only valid for Win32. */
+ rtx eax = gen_rtx_REG (SImode, 0);
+ bool eax_live = ix86_eax_live_at_start_p ();
+ rtx t;
+
+ gcc_assert (!TARGET_64BIT);
+
+ if (eax_live)
+ {
+ emit_insn (gen_push (eax));
+ allocate -= 4;
+ }
+
+ emit_move_insn (eax, GEN_INT (allocate));
+
+ insn = emit_insn (gen_allocate_stack_worker (eax));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
+ t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ t, REG_NOTES (insn));
+
+ if (eax_live)
+ {
+ if (frame_pointer_needed)
+ t = plus_constant (hard_frame_pointer_rtx,
+ allocate
+ - frame.to_allocate
+ - frame.nregs * UNITS_PER_WORD);
+ else
+ t = plus_constant (stack_pointer_rtx, allocate);
+ emit_move_insn (eax, gen_rtx_MEM (SImode, t));
+ }
+ }
+
+ if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
+ {
+ if (!frame_pointer_needed || !frame.to_allocate)
+ ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
+ else
+ ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+ }
+
+ pic_reg_used = false;
+ /* APPLE LOCAL begin 5695218 */
+ if (pic_offset_table_rtx && regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
+ && !TARGET_64BIT)
+ {
+ unsigned int alt_pic_reg_used;
+
+ alt_pic_reg_used = ix86_select_alt_pic_regnum ();
+ /* APPLE LOCAL end 5695218 */
+
+ if (alt_pic_reg_used != INVALID_REGNUM)
+ /* APPLE LOCAL begin 5695218 */
+ /* REGNO (pic_offset_table_rtx) = alt_pic_reg_used; */
+ ix86_globally_replace_pic_reg (alt_pic_reg_used);
+ /* APPLE LOCAL end 5695218 */
+
+ pic_reg_used = true;
+ }
+
+ if (pic_reg_used)
+ {
+ if (TARGET_64BIT)
+ insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+ else
+ insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+
+ /* Even with accurate pre-reload life analysis, we can wind up
+ deleting all references to the pic register after reload.
+ Consider if cross-jumping unifies two sides of a branch
+ controlled by a comparison vs the only read from a global.
+ In which case, allow the set_got to be deleted, though we're
+ too late to do anything about the ebx save in the prologue. */
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
+ }
+
+ /* Prevent function calls from be scheduled before the call to mcount.
+ In the pic_reg_used case, make sure that the got load isn't deleted. */
+ if (current_function_profile)
+ emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
+}
+
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ int maybe_eh_return)
+{
+ int regno;
+ rtx base_address = gen_rtx_MEM (Pmode, pointer);
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, maybe_eh_return))
+ {
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (Pmode, r11);
+ offset = 0;
+ }
+ emit_move_insn (gen_rtx_REG (Pmode, regno),
+ adjust_address (base_address, Pmode, offset));
+ offset += UNITS_PER_WORD;
+ }
+}
+
+/* Restore function stack, frame, and registers. */
+
+void
+ix86_expand_epilogue (int style)
+{
+ int regno;
+ int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
+ struct ix86_frame frame;
+ HOST_WIDE_INT offset;
+
+ ix86_compute_frame_layout (&frame);
+
+ /* Calculate start of saved registers relative to ebp. Special care
+ must be taken for the normal return case of a function using
+ eh_return: the eax and edx registers are marked as saved, but not
+ restored along this path. */
+ offset = frame.nregs;
+ if (current_function_calls_eh_return && style != 2)
+ offset -= 2;
+ offset *= -UNITS_PER_WORD;
+
+ /* APPLE LOCAL begin CW asm blocks */
+ /* For an asm function, don't generate an epilogue. */
+ if (cfun->iasm_asm_function)
+ {
+ emit_jump_insn (gen_return_internal ());
+ return;
+ }
+ /* APPLE LOCAL end CW asm blocks */
+
+ /* If we're only restoring one register and sp is not valid then
+ using a move instruction to restore the register since it's
+ less work than reloading sp and popping the register.
+
+ The default code result in stack adjustment using add/lea instruction,
+ while this code results in LEAVE instruction (or discrete equivalent),
+ so it is profitable in some other cases as well. Especially when there
+ are no registers to restore. We also use this code when TARGET_USE_LEAVE
+ and there is exactly one register to pop. This heuristic may need some
+ tuning in future. */
+ if ((!sp_valid && frame.nregs <= 1)
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1 || frame.to_allocate))
+ || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
+ || (frame_pointer_needed && TARGET_USE_LEAVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && frame.nregs == 1)
+ || current_function_calls_eh_return)
+ {
+ /* Restore registers. We can use ebp or esp to address the memory
+ locations. If both are available, default to ebp, since offsets
+ are known to be small. Only exception is esp pointing directly to the
+ end of block of saved registers, where we may simplify addressing
+ mode. */
+
+ if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ else
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset, style == 2);
+
+ /* eh_return epilogues need %ecx added to the stack pointer. */
+ if (style == 2)
+ {
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
+
+ if (frame_pointer_needed)
+ {
+ tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+ tmp = plus_constant (tmp, UNITS_PER_WORD);
+ emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+
+ tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
+ emit_move_insn (hard_frame_pointer_rtx, tmp);
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+ const0_rtx, style);
+ }
+ else
+ {
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+ tmp = plus_constant (tmp, (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD));
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+ }
+ }
+ else if (!frame_pointer_needed)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD),
+ style);
+ /* If not an i386, mov & pop is faster than "leave". */
+ else if (TARGET_USE_LEAVE || optimize_size
+ || !cfun->machine->use_fast_prologue_epilogue)
+ emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ else
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ const0_rtx, style);
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ }
+ }
+ else
+ {
+ /* First step is to deallocate the stack frame so that we can
+ pop the registers. */
+ if (!sp_valid)
+ {
+ gcc_assert (frame_pointer_needed);
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (offset), style);
+ }
+ else if (frame.to_allocate)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate), style);
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, false))
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
+ else
+ emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
+ }
+ if (frame_pointer_needed)
+ {
+ /* Leave results in shorter dependency chains on CPUs that are
+ able to grok it fast. */
+ if (TARGET_USE_LEAVE)
+ emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ else if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ }
+ }
+
+ if (cfun->machine->force_align_arg_pointer)
+ {
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ cfun->machine->force_align_arg_pointer,
+ GEN_INT (-4)));
+ }
+
+ /* Sibcall epilogues don't want a return instruction. */
+ if (style == 0)
+ return;
+
+ if (current_function_pops_args && current_function_args_size)
+ {
+ rtx popc = GEN_INT (current_function_pops_args);
+
+ /* i386 can only pop 64K bytes. If asked to pop more, pop
+ return address, do explicit add, and jump indirectly to the
+ caller. */
+
+ if (current_function_pops_args >= 65536)
+ {
+ rtx ecx = gen_rtx_REG (SImode, 2);
+
+ /* There is no "pascal" calling convention in 64bit ABI. */
+ gcc_assert (!TARGET_64BIT);
+
+ emit_insn (gen_popsi1 (ecx));
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
+ emit_jump_insn (gen_return_indirect_internal (ecx));
+ }
+ else
+ emit_jump_insn (gen_return_pop_internal (popc));
+ }
+ else
+ emit_jump_insn (gen_return_internal ());
+}
+
+/* Reset from the function's potential modifications. */
+
+static void
+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ if (pic_offset_table_rtx)
+ REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
+#if TARGET_MACHO
+ /* Mach-O doesn't support labels at the end of objects, so if
+ it looks like we might want one, insert a NOP. */
+ {
+ rtx insn = get_last_insn ();
+ while (insn
+ && NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
+ insn = PREV_INSN (insn);
+ if (insn
+ && (LABEL_P (insn)
+ || (NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
+ fputs ("\tnop\n", file);
+ }
+#endif
+
+}
+
+/* Extract the parts of an RTL expression that is a valid memory address
+ for an instruction. Return 0 if the structure of the address is
+ grossly off. Return -1 if the address contains ASHIFT, so it is not
+ strictly valid, but still used for computing length of lea instruction. */
+
+int
+ix86_decompose_address (rtx addr, struct ix86_address *out)
+{
+ rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+ rtx base_reg, index_reg;
+ HOST_WIDE_INT scale = 1;
+ rtx scale_rtx = NULL_RTX;
+ int retval = 1;
+ enum ix86_address_seg seg = SEG_DEFAULT;
+
+ if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
+ base = addr;
+ else if (GET_CODE (addr) == PLUS)
+ {
+ rtx addends[4], op;
+ int n = 0, i;
+
+ op = addr;
+ do
+ {
+ if (n >= 4)
+ return 0;
+ addends[n++] = XEXP (op, 1);
+ op = XEXP (op, 0);
+ }
+ while (GET_CODE (op) == PLUS);
+ if (n >= 4)
+ return 0;
+ addends[n] = op;
+
+ for (i = n; i >= 0; --i)
+ {
+ op = addends[i];
+ switch (GET_CODE (op))
+ {
+ case MULT:
+ if (index)
+ return 0;
+ index = XEXP (op, 0);
+ scale_rtx = XEXP (op, 1);
+ break;
+
+ case UNSPEC:
+ if (XINT (op, 1) == UNSPEC_TP
+ && TARGET_TLS_DIRECT_SEG_REFS
+ && seg == SEG_DEFAULT)
+ seg = TARGET_64BIT ? SEG_FS : SEG_GS;
+ else
+ return 0;
+ break;
+
+ case REG:
+ case SUBREG:
+ if (!base)
+ base = op;
+ else if (!index)
+ index = op;
+ else
+ return 0;
+ break;
+
+ case CONST:
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ if (disp)
+ return 0;
+ disp = op;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+ }
+ else if (GET_CODE (addr) == MULT)
+ {
+ index = XEXP (addr, 0); /* index*scale */
+ scale_rtx = XEXP (addr, 1);
+ }
+ else if (GET_CODE (addr) == ASHIFT)
+ {
+ rtx tmp;
+
+ /* We're called for lea too, which implements ashift on occasion. */
+ index = XEXP (addr, 0);
+ tmp = XEXP (addr, 1);
+ if (GET_CODE (tmp) != CONST_INT)
+ return 0;
+ scale = INTVAL (tmp);
+ if ((unsigned HOST_WIDE_INT) scale > 3)
+ return 0;
+ scale = 1 << scale;
+ retval = -1;
+ }
+ else
+ disp = addr; /* displacement */
+
+ /* Extract the integral value of scale. */
+ if (scale_rtx)
+ {
+ if (GET_CODE (scale_rtx) != CONST_INT)
+ return 0;
+ scale = INTVAL (scale_rtx);
+ }
+
+ base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
+ index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
+
+ /* Allow arg pointer and stack pointer as index if there is not scaling. */
+ if (base_reg && index_reg && scale == 1
+ && (index_reg == arg_pointer_rtx
+ || index_reg == frame_pointer_rtx
+ || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
+ {
+ rtx tmp;
+ tmp = base, base = index, index = tmp;
+ tmp = base_reg, base_reg = index_reg, index_reg = tmp;
+ }
+
+ /* Special case: %ebp cannot be encoded as a base without a displacement. */
+ if ((base_reg == hard_frame_pointer_rtx
+ || base_reg == frame_pointer_rtx
+ || base_reg == arg_pointer_rtx) && !disp)
+ disp = const0_rtx;
+
+ /* Special case: on K6, [%esi] makes the instruction vector decoded.
+ Avoid this by transforming to [%esi+0]. */
+ if (ix86_tune == PROCESSOR_K6 && !optimize_size
+ && base_reg && !index_reg && !disp
+ && REG_P (base_reg)
+ && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
+ disp = const0_rtx;
+
+ /* Special case: encode reg+reg instead of reg*2. */
+ if (!base && index && scale && scale == 2)
+ base = index, base_reg = index_reg, scale = 1;
+
+ /* Special case: scaling cannot be encoded without base or displacement. */
+ if (!base && !disp && index && scale != 1)
+ disp = const0_rtx;
+
+ out->base = base;
+ out->index = index;
+ out->disp = disp;
+ out->scale = scale;
+ out->seg = seg;
+
+ return retval;
+}
+
+/* Return cost of the memory address x.
+ For i386, it is better to use a complex address than let gcc copy
+ the address into a reg and make a new pseudo. But not if the address
+ requires to two regs - that would mean more pseudos with longer
+ lifetimes. */
+static int
+ix86_address_cost (rtx x)
+{
+ struct ix86_address parts;
+ int cost = 1;
+ int ok = ix86_decompose_address (x, &parts);
+
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ /* More complex memory references are better. */
+ if (parts.disp && parts.disp != const0_rtx)
+ cost--;
+ if (parts.seg != SEG_DEFAULT)
+ cost--;
+
+ /* Attempt to minimize number of registers in the address. */
+ if ((parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+ || (parts.index
+ && (!REG_P (parts.index)
+ || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+ cost++;
+
+ if (parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+ && parts.index
+ && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+ && parts.base != parts.index)
+ cost++;
+
+ /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+ since it's predecode logic can't detect the length of instructions
+ and it degenerates to vector decoded. Increase cost of such
+ addresses here. The penalty is minimally 2 cycles. It may be worthwhile
+ to split such addresses or even refuse such addresses at all.
+
+ Following addressing modes are affected:
+ [base+scale*index]
+ [scale*index+disp]
+ [base+index]
+
+ The first and last case may be avoidable by explicitly coding the zero in
+ memory address, but I don't have AMD-K6 machine handy to check this
+ theory. */
+
+ if (TARGET_K6
+ && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+ || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+ || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+ cost += 10;
+
+ return cost;
+}
+
+/* If X is a machine specific address (i.e. a symbol or label being
+ referenced as a displacement from the GOT implemented using an
+ UNSPEC), then return the base term. Otherwise return X. */
+
+rtx
+ix86_find_base_term (rtx x)
+{
+ rtx term;
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST)
+ return x;
+ term = XEXP (x, 0);
+ if (GET_CODE (term) == PLUS
+ && (GET_CODE (XEXP (term, 1)) == CONST_INT
+ || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
+ term = XEXP (term, 0);
+ if (GET_CODE (term) != UNSPEC
+ || XINT (term, 1) != UNSPEC_GOTPCREL)
+ return x;
+
+ term = XVECEXP (term, 0, 0);
+
+ if (GET_CODE (term) != SYMBOL_REF
+ && GET_CODE (term) != LABEL_REF)
+ return x;
+
+ return term;
+ }
+
+ term = ix86_delegitimize_address (x);
+
+ if (GET_CODE (term) != SYMBOL_REF
+ && GET_CODE (term) != LABEL_REF)
+ return x;
+
+ return term;
+}
+
+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
+ this is used for to form addresses to local data when -fPIC is in
+ use. */
+
+static bool
+darwin_local_data_pic (rtx disp)
+{
+ if (GET_CODE (disp) == MINUS)
+ {
+ if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
+ || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
+ if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
+ {
+ const char *sym_name = XSTR (XEXP (disp, 1), 0);
+ if (! strcmp (sym_name, "<pic base>"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Determine if a given RTX is a valid constant. We already know this
+ satisfies CONSTANT_P. */
+
+bool
+legitimate_constant_p (rtx x)
+{
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ x = XEXP (x, 0);
+
+ if (GET_CODE (x) == PLUS)
+ {
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ return false;
+ x = XEXP (x, 0);
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (x))
+ return true;
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (x) == UNSPEC)
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ case UNSPEC_NTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ default:
+ return false;
+ }
+
+ /* We must have drilled down to a symbol. */
+ if (GET_CODE (x) == LABEL_REF)
+ return true;
+ if (GET_CODE (x) != SYMBOL_REF)
+ return false;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS symbols are never valid. */
+ if (SYMBOL_REF_TLS_MODEL (x))
+ return false;
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (TARGET_MACHO && MACHO_DYNAMIC_NO_PIC_P)
+ return machopic_symbol_defined_p (x);
+#endif
+ break;
+
+ case PLUS:
+ {
+ rtx left = XEXP (x, 0);
+ rtx right = XEXP (x, 1);
+ bool left_is_constant = legitimate_constant_p (left);
+ bool right_is_constant = legitimate_constant_p (right);
+ return left_is_constant && right_is_constant;
+ }
+ break;
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == TImode
+ && x != CONST0_RTX (TImode)
+ && !TARGET_64BIT)
+ return false;
+ break;
+
+ case CONST_VECTOR:
+ /* APPLE LOCAL begin radar 4874197 mainline candidate */
+ if (standard_sse_constant_p (x))
+ /* APPLE LOCAL end radar 4874197 mainline candidate */
+ return true;
+ return false;
+
+ default:
+ break;
+ }
+
+ /* Otherwise we handle everything else in the move patterns. */
+ return true;
+}
+
+/* Determine if it's legal to put X into the constant pool. This
+ is not possible for the address of thread-local symbols, which
+ is checked above. */
+
+static bool
+ix86_cannot_force_const_mem (rtx x)
+{
+ /* We can always put integral constants and vectors in memory. */
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ return false;
+
+ default:
+ break;
+ }
+ return !legitimate_constant_p (x);
+}
+
+/* Determine if a given RTX is a valid constant address. */
+
+bool
+constant_address_p (rtx x)
+{
+ return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+ when generating PIC code. It is given that flag_pic is on and
+ that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+ rtx inner;
+
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ inner = XEXP (x, 0);
+ if (GET_CODE (inner) == PLUS
+ && GET_CODE (XEXP (inner, 1)) == CONST_INT)
+ inner = XEXP (inner, 0);
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (inner) == UNSPEC)
+ switch (XINT (inner, 1))
+ {
+ case UNSPEC_GOTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ x = XVECEXP (inner, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ default:
+ return false;
+ }
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return legitimate_pic_address_disp_p (x);
+
+ default:
+ return true;
+ }
+}
+
+/* Determine if a given CONST RTX is a valid memory displacement
+ in PIC mode. */
+
+int
+legitimate_pic_address_disp_p (rtx disp)
+{
+ bool saw_plus;
+
+ /* In 64bit mode we can allow direct addresses of symbols and labels
+ when they are not dynamic symbols. */
+ if (TARGET_64BIT)
+ {
+ rtx op0 = disp, op1;
+
+ switch (GET_CODE (disp))
+ {
+ case LABEL_REF:
+ return true;
+
+ case CONST:
+ if (GET_CODE (XEXP (disp, 0)) != PLUS)
+ break;
+ op0 = XEXP (XEXP (disp, 0), 0);
+ op1 = XEXP (XEXP (disp, 0), 1);
+ if (GET_CODE (op1) != CONST_INT
+ || INTVAL (op1) >= 16*1024*1024
+ || INTVAL (op1) < -16*1024*1024)
+ break;
+ if (GET_CODE (op0) == LABEL_REF)
+ return true;
+ if (GET_CODE (op0) != SYMBOL_REF)
+ break;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS references should always be enclosed in UNSPEC. */
+ if (SYMBOL_REF_TLS_MODEL (op0))
+ return false;
+ /* APPLE LOCAL begin fix-and-continue 6227434 */
+#if TARGET_MACHO
+ if (machopic_data_defined_p (op0))
+ return true;
+
+ /* Under -mfix-and-continue, even local storage is
+ addressed via the GOT, so that the value of local
+ statics is preserved when a function is "fixed." */
+ if (indirect_data (op0))
+ return false;
+#endif
+ /* APPLE LOCAL end fix-and-continue 6227434 */
+ if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
+ return true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ if (GET_CODE (disp) != CONST)
+ return 0;
+ disp = XEXP (disp, 0);
+
+ if (TARGET_64BIT)
+ {
+ /* We are unsafe to allow PLUS expressions. This limit allowed distance
+ of GOT tables. We should not need these anyway. */
+ if (GET_CODE (disp) != UNSPEC
+ || (XINT (disp, 1) != UNSPEC_GOTPCREL
+ && XINT (disp, 1) != UNSPEC_GOTOFF))
+ return 0;
+
+ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+ return 0;
+ return 1;
+ }
+
+ saw_plus = false;
+ if (GET_CODE (disp) == PLUS)
+ {
+ if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
+ return 0;
+ disp = XEXP (disp, 0);
+ saw_plus = true;
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (disp))
+ return 1;
+
+ if (GET_CODE (disp) != UNSPEC)
+ return 0;
+
+ switch (XINT (disp, 1))
+ {
+ case UNSPEC_GOT:
+ if (saw_plus)
+ return false;
+ return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
+ case UNSPEC_GOTOFF:
+ /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
+ While ABI specify also 32bit relocation but we don't produce it in
+ small PIC model at all. */
+ if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+ && !TARGET_64BIT)
+ return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
+ return false;
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ if (saw_plus)
+ return false;
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
+ case UNSPEC_NTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+ }
+
+ return 0;
+}
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
+ memory address for an instruction. The MODE argument is the machine mode
+ for the MEM expression that wants to use this address.
+
+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
+ convert common non-canonical forms to canonical form so that they will
+ be recognized. */
+
+int
+legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ HOST_WIDE_INT scale;
+ const char *reason = NULL;
+ rtx reason_rtx = NULL_RTX;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
+ GET_MODE_NAME (mode), strict);
+ debug_rtx (addr);
+ }
+
+ if (ix86_decompose_address (addr, &parts) <= 0)
+ {
+ reason = "decomposition failed";
+ goto report_error;
+ }
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ /* Validate base register.
+
+ Don't allow SUBREG's that span more than a word here. It can lead to spill
+ failures when the base is one word out of a two word structure, which is
+ represented internally as a DImode int. */
+
+ if (base)
+ {
+ rtx reg;
+ reason_rtx = base;
+
+ if (REG_P (base))
+ reg = base;
+ else if (GET_CODE (base) == SUBREG
+ && REG_P (SUBREG_REG (base))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (base);
+ else
+ {
+ reason = "base is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (base) != Pmode)
+ {
+ reason = "base is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
+ {
+ reason = "base is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate index register.
+
+ Don't allow SUBREG's that span more than a word here -- same as above. */
+
+ if (index)
+ {
+ rtx reg;
+ reason_rtx = index;
+
+ if (REG_P (index))
+ reg = index;
+ else if (GET_CODE (index) == SUBREG
+ && REG_P (SUBREG_REG (index))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (index);
+ else
+ {
+ reason = "index is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (index) != Pmode)
+ {
+ reason = "index is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
+ {
+ reason = "index is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate scale factor. */
+ if (scale != 1)
+ {
+ reason_rtx = GEN_INT (scale);
+ if (!index)
+ {
+ reason = "scale without index";
+ goto report_error;
+ }
+
+ if (scale != 2 && scale != 4 && scale != 8)
+ {
+ reason = "scale is not a valid multiplier";
+ goto report_error;
+ }
+ }
+
+ /* Validate displacement. */
+ if (disp)
+ {
+ reason_rtx = disp;
+
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC)
+ switch (XINT (XEXP (disp, 0), 1))
+ {
+ /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
+ used. While ABI specify also 32bit relocations, we don't produce
+ them at all and use IP relative instead. */
+ case UNSPEC_GOT:
+ case UNSPEC_GOTOFF:
+ gcc_assert (flag_pic);
+ if (!TARGET_64BIT)
+ goto is_legitimate_pic;
+ reason = "64bit address unspec";
+ goto report_error;
+
+ case UNSPEC_GOTPCREL:
+ gcc_assert (flag_pic);
+ goto is_legitimate_pic;
+
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ case UNSPEC_NTPOFF:
+ case UNSPEC_DTPOFF:
+ break;
+
+ default:
+ reason = "invalid address unspec";
+ goto report_error;
+ }
+
+ else if (SYMBOLIC_CONST (disp)
+ && (flag_pic
+ || (TARGET_MACHO
+#if TARGET_MACHO
+ && MACHOPIC_INDIRECT
+ && !machopic_operand_p (disp)
+#endif
+ )))
+ {
+
+ is_legitimate_pic:
+ if (TARGET_64BIT && (index || base))
+ {
+ /* foo@dtpoff(%rX) is ok. */
+ if (GET_CODE (disp) != CONST
+ || GET_CODE (XEXP (disp, 0)) != PLUS
+ || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
+ || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
+ || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ {
+ reason = "non-constant pic memory reference";
+ goto report_error;
+ }
+ }
+ /* APPLE LOCAL begin dynamic-no-pic */
+ else if (flag_pic && ! legitimate_pic_address_disp_p (disp))
+ {
+ reason = "displacement is an invalid pic construct";
+ goto report_error;
+ }
+#if TARGET_MACHO
+ else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
+ {
+ reason = "displacment must be referenced via non_lazy_pointer";
+ goto report_error;
+ }
+#endif
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ /* This code used to verify that a symbolic pic displacement
+ includes the pic_offset_table_rtx register.
+
+ While this is good idea, unfortunately these constructs may
+ be created by "adds using lea" optimization for incorrect
+ code like:
+
+ int a;
+ int foo(int i)
+ {
+ return *(&a+i);
+ }
+
+ This code is nonsensical, but results in addressing
+ GOT table with pic_offset_table_rtx base. We can't
+ just refuse it easily, since it gets matched by
+ "addsi3" pattern, that later gets split to lea in the
+ case output register differs from input. While this
+ can be handled by separate addsi pattern for this case
+ that never results in lea, this seems to be easier and
+ correct fix for crash to disable this test. */
+ }
+ else if (GET_CODE (disp) != LABEL_REF
+ && GET_CODE (disp) != CONST_INT
+ && (GET_CODE (disp) != CONST
+ || !legitimate_constant_p (disp))
+ && (GET_CODE (disp) != SYMBOL_REF
+ || !legitimate_constant_p (disp)))
+ {
+ reason = "displacement is not constant";
+ goto report_error;
+ }
+ else if (TARGET_64BIT
+ && !x86_64_immediate_operand (disp, VOIDmode))
+ {
+ reason = "displacement is out of range";
+ goto report_error;
+ }
+ }
+
+ /* Everything looks valid. */
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr, "Success.\n");
+ return TRUE;
+
+ report_error:
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "Error: %s\n", reason);
+ debug_rtx (reason_rtx);
+ }
+ return FALSE;
+}
+
+/* Return a unique alias set for the GOT. */
+
+static HOST_WIDE_INT
+ix86_GOT_alias_set (void)
+{
+ static HOST_WIDE_INT set = -1;
+ if (set == -1)
+ set = new_alias_set ();
+ return set;
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+ register REG. If REG is 0, a new pseudo is generated.
+
+ There are two types of references that must be handled:
+
+ 1. Global data references must load the address from the GOT, via
+ the PIC reg. An insn is emitted to do this load, and the reg is
+ returned.
+
+ 2. Static data references, constant pool addresses, and code labels
+ compute the address as an offset from the GOT, whose base is in
+ the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
+ differentiate them from global data objects. The returned
+ address is the PIC reg + an unspec constant.
+
+ GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
+ reg also appears in the address. */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+ rtx addr = orig;
+ rtx new = orig;
+ rtx base;
+
+#if TARGET_MACHO
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use the generic Mach-O PIC machinery. */
+ return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
+ }
+#endif
+
+ if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
+ new = addr;
+ else if (TARGET_64BIT
+ && ix86_cmodel != CM_SMALL_PIC
+ && local_symbolic_operand (addr, Pmode))
+ {
+ rtx tmpreg;
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
+ }
+ else
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new = gen_rtx_CONST (Pmode, new);
+ if (!reg)
+ tmpreg = gen_reg_rtx (Pmode);
+ else
+ tmpreg = reg;
+ emit_move_insn (tmpreg, new);
+
+ if (reg != 0)
+ {
+ new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
+ tmpreg, 1, OPTAB_DIRECT);
+ new = reg;
+ }
+ else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
+ }
+ else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
+ {
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
+ }
+ else
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+ {
+ if (TARGET_64BIT)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_const_mem (Pmode, new);
+ set_mem_alias_set (new, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use directly gen_movsi, otherwise the address is loaded
+ into register for CSE. We don't want to CSE this addresses,
+ instead we CSE addresses from the GOT table, so skip this. */
+ emit_insn (gen_movsi (reg, new));
+ new = reg;
+ }
+ else
+ {
+ /* This symbol must be referenced via a load from the
+ Global Offset Table (@GOT). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+ new = gen_const_mem (Pmode, new);
+ set_mem_alias_set (new, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else
+ {
+ if (GET_CODE (addr) == CONST_INT
+ && !x86_64_immediate_operand (addr, VOIDmode))
+ {
+ if (reg)
+ {
+ emit_move_insn (reg, addr);
+ new = reg;
+ }
+ else
+ new = force_reg (Pmode, addr);
+ }
+ else if (GET_CODE (addr) == CONST)
+ {
+ addr = XEXP (addr, 0);
+
+ /* We must match stuff we generate before. Assume the only
+ unspecs that can get here are ours. Not that we could do
+ anything with them anyway.... */
+ if (GET_CODE (addr) == UNSPEC
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == UNSPEC))
+ return orig;
+ gcc_assert (GET_CODE (addr) == PLUS);
+ }
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+ /* Check first to see if this is a constant offset from a @GOTOFF
+ symbol reference. */
+ if (local_symbolic_operand (op0, Pmode)
+ && GET_CODE (op1) == CONST_INT)
+ {
+ if (!TARGET_64BIT)
+ {
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+ UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, op1);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else
+ {
+ if (INTVAL (op1) < -16*1024*1024
+ || INTVAL (op1) >= 16*1024*1024)
+ {
+ if (!x86_64_immediate_operand (op1, Pmode))
+ op1 = force_reg (Pmode, op1);
+ new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
+ }
+ }
+ }
+ else
+ {
+ base = legitimize_pic_address (XEXP (addr, 0), reg);
+ new = legitimize_pic_address (XEXP (addr, 1),
+ base == reg ? NULL_RTX : reg);
+
+ if (GET_CODE (new) == CONST_INT)
+ new = plus_constant (base, INTVAL (new));
+ else
+ {
+ if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
+ {
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
+ new = XEXP (new, 1);
+ }
+ new = gen_rtx_PLUS (Pmode, base, new);
+ /* APPLE LOCAL begin fix-and-continue 6358507 */
+ if (!legitimate_address_p (Pmode, new, FALSE))
+ new = force_reg (Pmode, new);
+ /* APPLE LOCAL end fix-and-continue 6358507 */
+ }
+ }
+ }
+ }
+ return new;
+}
+
+/* Load the thread pointer. If TO_REG is true, force it into a register. */
+
+static rtx
+get_thread_pointer (int to_reg)
+{
+ rtx tp, reg, insn;
+
+ tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+ if (!to_reg)
+ return tp;
+
+ reg = gen_reg_rtx (Pmode);
+ insn = gen_rtx_SET (VOIDmode, reg, tp);
+ insn = emit_insn (insn);
+
+ return reg;
+}
+
+/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
+ false if we expect this to be used for a memory address and true if
+ we expect to load the address into a register. */
+
+static rtx
+legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+{
+ rtx dest, base, off, pic, tp;
+ int type;
+
+ switch (model)
+ {
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ dest = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, 0), insns;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
+ insns = get_insns ();
+ end_sequence ();
+
+ emit_libcall_block (insns, dest, rax, x);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_global_dynamic_64 (dest, x));
+ else
+ emit_insn (gen_tls_global_dynamic_32 (dest, x));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_DYNAMIC:
+ base = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
+ insns = get_insns ();
+ end_sequence ();
+
+ note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
+ note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
+ emit_libcall_block (insns, base, rax, note);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_local_dynamic_base_64 (base));
+ else
+ emit_insn (gen_tls_local_dynamic_base_32 (base));
+
+ if (TARGET_GNU2_TLS)
+ {
+ rtx x = ix86_tls_module_base ();
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ gen_rtx_MINUS (Pmode, x, tp));
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+
+ break;
+
+ case TLS_MODEL_INITIAL_EXEC:
+ if (TARGET_64BIT)
+ {
+ pic = NULL;
+ type = UNSPEC_GOTNTPOFF;
+ }
+ else if (flag_pic)
+ {
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ pic = pic_offset_table_rtx;
+ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+ }
+ else if (!TARGET_ANY_GNU_TLS)
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ type = UNSPEC_GOTTPOFF;
+ }
+ else
+ {
+ pic = NULL;
+ type = UNSPEC_INDNTPOFF;
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
+ off = gen_rtx_CONST (Pmode, off);
+ if (pic)
+ off = gen_rtx_PLUS (Pmode, pic, off);
+ off = gen_const_mem (Pmode, off);
+ set_mem_alias_set (off, ix86_GOT_alias_set ());
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ off = force_reg (Pmode, off);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_EXEC:
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+ (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return dest;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the 80386, we handle X+REG by loading X into a register R and
+ using R+REG. R will go in a general reg and indexing will be used.
+ However, if REG is a broken-out memory address or multiplication,
+ nothing needs to be done because REG can certainly go in a general reg.
+
+ When -fpic is used, special handling is needed for symbolic references.
+ See comments by legitimize_pic_address in i386.c for details. */
+
+rtx
+legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
+{
+ int changed = 0;
+ unsigned log;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
+ GET_MODE_NAME (mode));
+ debug_rtx (x);
+ }
+
+ log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+ if (log)
+ return legitimize_tls_address (x, log, false);
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+ && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
+ {
+ rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+ }
+
+ if (flag_pic && SYMBOLIC_CONST (x))
+ return legitimize_pic_address (x, 0);
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
+ return machopic_indirect_data_reference (x, 0);
+#endif
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
+ if (GET_CODE (x) == ASHIFT
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (x, 1));
+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (x) == PLUS)
+ {
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
+
+ if (GET_CODE (XEXP (x, 0)) == ASHIFT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 0), 1));
+ XEXP (x, 0) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == ASHIFT
+ && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 1), 1));
+ XEXP (x, 1) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
+ GEN_INT (1 << log));
+ }
+
+ /* Put multiply first if it isn't already. */
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ rtx tmp = XEXP (x, 0);
+ XEXP (x, 0) = XEXP (x, 1);
+ XEXP (x, 1) = tmp;
+ changed = 1;
+ }
+
+ /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
+ created by virtual register instantiation, register elimination, and
+ similar optimizations. */
+ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (x, 0),
+ XEXP (XEXP (x, 1), 0)),
+ XEXP (XEXP (x, 1), 1));
+ }
+
+ /* Canonicalize
+ (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). */
+ else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ rtx constant;
+ rtx other = NULL_RTX;
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ constant = XEXP (x, 1);
+ other = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ }
+ else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
+ {
+ constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ other = XEXP (x, 1);
+ }
+ else
+ constant = 0;
+
+ if (constant)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
+ XEXP (XEXP (XEXP (x, 0), 1), 0)),
+ plus_constant (other, INTVAL (constant)));
+ }
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+ }
+
+ if (changed
+ && GET_CODE (XEXP (x, 1)) == REG
+ && GET_CODE (XEXP (x, 0)) == REG)
+ return x;
+
+ if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
+ {
+ changed = 1;
+ x = legitimize_pic_address (x, 0);
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (GET_CODE (XEXP (x, 0)) == REG)
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 1), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 1) = temp;
+ return x;
+ }
+
+ else if (GET_CODE (XEXP (x, 1)) == REG)
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 0), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 0) = temp;
+ return x;
+ }
+ }
+
+ return x;
+}
+
+/* Print an integer constant expression in assembler syntax. Addition
+ and subtraction are the only arithmetic that may appear in these
+ expressions. FILE is the stdio stream to write to, X is the rtx, and
+ CODE is the operand print code from the output string. */
+
+static void
+output_pic_addr_const (FILE *file, rtx x, int code)
+{
+ char buf[256];
+
+ switch (GET_CODE (x))
+ {
+ case PC:
+ gcc_assert (flag_pic);
+ putc ('.', file);
+ break;
+
+ case SYMBOL_REF:
+ /* APPLE LOCAL begin axe stubs 5571540 */
+ if (! TARGET_MACHO ||
+#if TARGET_MACHO
+ ! darwin_stubs ||
+#endif
+ TARGET_64BIT)
+ /* APPLE LOCAL end axe stubs 5571540 */
+ output_addr_const (file, x);
+ else
+ {
+ const char *name = XSTR (x, 0);
+
+ /* Mark the decl as referenced so that cgraph will output the function. */
+ if (SYMBOL_REF_DECL (x))
+ mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+#if TARGET_MACHO
+ if (MACHOPIC_INDIRECT
+ && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+ name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+ assemble_name (file, name);
+ }
+ if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+ fputs ("@PLT", file);
+ break;
+
+ case LABEL_REF:
+ x = XEXP (x, 0);
+ /* FALLTHRU */
+ case CODE_LABEL:
+ ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+ assemble_name (asm_out_file, buf);
+ break;
+
+ case CONST_INT:
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ break;
+
+ case CONST:
+ /* This used to output parentheses around the expression,
+ but that does not work on the 386 (either ATT or BSD assembler). */
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ break;
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == VOIDmode)
+ {
+ /* We can use %d if the number is <32 bits and positive. */
+ if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
+ fprintf (file, "0x%lx%08lx",
+ (unsigned long) CONST_DOUBLE_HIGH (x),
+ (unsigned long) CONST_DOUBLE_LOW (x));
+ else
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+ }
+ else
+ /* We can't handle floating point constants;
+ PRINT_OPERAND must handle them. */
+ output_operand_lossage ("floating constant misused");
+ break;
+
+ case PLUS:
+ /* Some assemblers need integer constants to appear first. */
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+ {
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ }
+ else
+ {
+ gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ }
+ break;
+
+ case MINUS:
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('-', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
+ break;
+
+ case UNSPEC:
+ gcc_assert (XVECLEN (x, 0) == 1);
+ output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOT:
+ fputs ("@GOT", file);
+ break;
+ case UNSPEC_GOTOFF:
+ fputs ("@GOTOFF", file);
+ break;
+ case UNSPEC_GOTPCREL:
+ fputs ("@GOTPCREL(%rip)", file);
+ break;
+ case UNSPEC_GOTTPOFF:
+ /* FIXME: This might be @TPOFF in Sun ld too. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ if (TARGET_64BIT)
+ fputs ("@GOTTPOFF(%rip)", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ fputs ("@INDNTPOFF", file);
+ break;
+ default:
+ output_operand_lossage ("invalid UNSPEC as operand");
+ break;
+ }
+ break;
+
+ default:
+ output_operand_lossage ("invalid expression as operand");
+ }
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+ We need to emit DTP-relative relocations. */
+
+static void
+i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+ fputs (ASM_LONG, file);
+ output_addr_const (file, x);
+ fputs ("@DTPOFF", file);
+ switch (size)
+ {
+ case 4:
+ break;
+ case 8:
+ fputs (", 0", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+ general assembler lossage, recognize PIC+GOTOFF and turn it back
+ into a direct symbol reference.
+
+ On Darwin, this is necessary to avoid a crash, because Darwin
+ has a different PIC label for each routine but the DWARF debugging
+ information is not associated with any particular routine, so it's
+ necessary to remove references to the PIC label from RTL stored by
+ the DWARF output code. */
+
+static rtx
+ix86_delegitimize_address (rtx orig_x)
+{
+ rtx x = orig_x;
+ /* reg_addend is NULL or a multiple of some register. */
+ rtx reg_addend = NULL_RTX;
+ /* const_addend is NULL or a const_int. */
+ rtx const_addend = NULL_RTX;
+ /* This is the result, or NULL. */
+ rtx result = NULL_RTX;
+
+ if (GET_CODE (x) == MEM)
+ x = XEXP (x, 0);
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST
+ || GET_CODE (XEXP (x, 0)) != UNSPEC
+ || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
+ || GET_CODE (orig_x) != MEM)
+ return orig_x;
+ return XVECEXP (XEXP (x, 0), 0, 0);
+ }
+
+ if (GET_CODE (x) != PLUS
+ || GET_CODE (XEXP (x, 1)) != CONST)
+ return orig_x;
+
+ if (GET_CODE (XEXP (x, 0)) == REG
+ && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ /* %ebx + GOT/GOTOFF */
+ ;
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ /* %ebx + %reg * scale + GOT/GOTOFF */
+ reg_addend = XEXP (x, 0);
+ if (GET_CODE (XEXP (reg_addend, 0)) == REG
+ && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ reg_addend = XEXP (reg_addend, 1);
+ else if (GET_CODE (XEXP (reg_addend, 1)) == REG
+ && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
+ reg_addend = XEXP (reg_addend, 0);
+ else
+ return orig_x;
+ if (GET_CODE (reg_addend) != REG
+ && GET_CODE (reg_addend) != MULT
+ && GET_CODE (reg_addend) != ASHIFT)
+ return orig_x;
+ }
+ else
+ return orig_x;
+
+ x = XEXP (XEXP (x, 1), 0);
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ const_addend = XEXP (x, 1);
+ x = XEXP (x, 0);
+ }
+
+ if (GET_CODE (x) == UNSPEC
+ && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
+ || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
+ result = XVECEXP (x, 0, 0);
+
+ if (TARGET_MACHO && darwin_local_data_pic (x)
+ && GET_CODE (orig_x) != MEM)
+ result = XEXP (x, 0);
+
+ if (! result)
+ return orig_x;
+
+ if (const_addend)
+ result = gen_rtx_PLUS (Pmode, result, const_addend);
+ if (reg_addend)
+ result = gen_rtx_PLUS (Pmode, reg_addend, result);
+ return result;
+}
+
+static void
+put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
+ int fp, FILE *file)
+{
+ const char *suffix;
+
+ if (mode == CCFPmode || mode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
+ code = ix86_fp_compare_code_to_integer (code);
+ mode = CCmode;
+ }
+ if (reverse)
+ code = reverse_condition (code);
+
+ switch (code)
+ {
+ case EQ:
+ suffix = "e";
+ break;
+ case NE:
+ suffix = "ne";
+ break;
+ case GT:
+ gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
+ suffix = "g";
+ break;
+ case GTU:
+ /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
+ Those same assemblers have the same but opposite lossage on cmov. */
+ gcc_assert (mode == CCmode);
+ suffix = fp ? "nbe" : "a";
+ break;
+ case LT:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "s";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "l";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case LTU:
+ gcc_assert (mode == CCmode);
+ suffix = "b";
+ break;
+ case GE:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "ns";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "ge";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case GEU:
+ /* ??? As above. */
+ gcc_assert (mode == CCmode);
+ suffix = fp ? "nb" : "ae";
+ break;
+ case LE:
+ gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
+ suffix = "le";
+ break;
+ case LEU:
+ gcc_assert (mode == CCmode);
+ suffix = "be";
+ break;
+ case UNORDERED:
+ suffix = fp ? "u" : "p";
+ break;
+ case ORDERED:
+ suffix = fp ? "nu" : "np";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ fputs (suffix, file);
+}
+
+/* Print the name of register X to FILE based on its machine mode and number.
+ If CODE is 'w', pretend the mode is HImode.
+ If CODE is 'b', pretend the mode is QImode.
+ If CODE is 'k', pretend the mode is SImode.
+ If CODE is 'q', pretend the mode is DImode.
+ If CODE is 'h', pretend the reg is the 'high' byte register.
+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
+
+void
+print_reg (rtx x, int code, FILE *file)
+{
+ gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
+ && REGNO (x) != FRAME_POINTER_REGNUM
+ && REGNO (x) != FLAGS_REG
+ && REGNO (x) != FPSR_REG);
+
+ if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+
+ if (code == 'w' || MMX_REG_P (x))
+ code = 2;
+ else if (code == 'b')
+ code = 1;
+ else if (code == 'k')
+ code = 4;
+ else if (code == 'q')
+ code = 8;
+ else if (code == 'y')
+ code = 3;
+ else if (code == 'h')
+ code = 0;
+ else
+ code = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Irritatingly, AMD extended registers use different naming convention
+ from the normal registers. */
+ if (REX_INT_REG_P (x))
+ {
+ gcc_assert (TARGET_64BIT);
+ switch (code)
+ {
+ case 0:
+ error ("extended registers have no high halves");
+ break;
+ case 1:
+ fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 2:
+ fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 4:
+ fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 8:
+ fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ default:
+ error ("unsupported operand size for extended register");
+ break;
+ }
+ return;
+ }
+ switch (code)
+ {
+ case 3:
+ if (STACK_TOP_P (x))
+ {
+ fputs ("st(0)", file);
+ break;
+ }
+ /* FALLTHRU */
+ case 8:
+ case 4:
+ case 12:
+ if (! ANY_FP_REG_P (x))
+ putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
+ /* FALLTHRU */
+ case 16:
+ case 2:
+ normal:
+ fputs (hi_reg_name[REGNO (x)], file);
+ break;
+ case 1:
+ if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+ goto normal;
+ fputs (qi_reg_name[REGNO (x)], file);
+ break;
+ case 0:
+ if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+ goto normal;
+ fputs (qi_high_reg_name[REGNO (x)], file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+ so that we can print its name in some tls_local_dynamic_base
+ pattern. */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+ rtx insn;
+
+ if (cfun->machine->some_ld_name)
+ return cfun->machine->some_ld_name;
+
+ for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+ return cfun->machine->some_ld_name;
+
+ gcc_unreachable ();
+}
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+ rtx x = *px;
+
+ if (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+ {
+ cfun->machine->some_ld_name = XSTR (x, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Meaning of CODE:
+ L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+ C -- print opcode suffix for set/cmov insn.
+ c -- like C, but print reversed condition
+ F,f -- likewise, but for floating-point.
+ O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+ otherwise nothing
+ R -- print the prefix for register names.
+ z -- print the opcode suffix for the size of the current operand.
+ * -- print a star (in certain assembler syntax)
+ A -- print an absolute memory reference.
+ w -- print the operand as if it's a "word" (HImode) even if it isn't.
+ s -- print a shift double count, followed by the assemblers argument
+ delimiter.
+ b -- print the QImode name of the register for the indicated operand.
+ %b0 would print %al if operands[0] is reg 0.
+ w -- likewise, print the HImode name of the register.
+ k -- likewise, print the SImode name of the register.
+ q -- likewise, print the DImode name of the register.
+ h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+ y -- print "st(0)" instead of "st" as a register.
+ D -- print condition for SSE cmp instruction.
+ P -- if PIC, print an @PLT suffix.
+ X -- don't print any sort of PIC '@' suffix for a symbol.
+ & -- print some in-use local-dynamic symbol name.
+ H -- print a memory address offset by 8; used for sse high-parts
+ */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+ if (code)
+ {
+ switch (code)
+ {
+ case '*':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('*', file);
+ return;
+
+ case '&':
+ assemble_name (file, get_some_local_dynamic_name ());
+ return;
+
+ case 'A':
+ switch (ASSEMBLER_DIALECT)
+ {
+ case ASM_ATT:
+ putc ('*', file);
+ break;
+
+ case ASM_INTEL:
+ /* Intel syntax. For absolute addresses, registers should not
+ be surrounded by braces. */
+ if (GET_CODE (x) != REG)
+ {
+ putc ('[', file);
+ PRINT_OPERAND (file, x, 0);
+ putc (']', file);
+ return;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ PRINT_OPERAND (file, x, 0);
+ return;
+
+
+ case 'L':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'W':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('w', file);
+ return;
+
+ case 'B':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('b', file);
+ return;
+
+ case 'Q':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'S':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('s', file);
+ return;
+
+ case 'T':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('t', file);
+ return;
+
+ case 'z':
+ /* 387 opcodes don't get size suffixes if the operands are
+ registers. */
+ if (STACK_REG_P (x))
+ return;
+
+ /* Likewise if using Intel opcodes. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return;
+
+ /* This is the size of op from size of operand. */
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 2:
+#ifdef HAVE_GAS_FILDS_FISTS
+ putc ('s', file);
+#endif
+ return;
+
+ case 4:
+ if (GET_MODE (x) == SFmode)
+ {
+ putc ('s', file);
+ return;
+ }
+ else
+ putc ('l', file);
+ return;
+
+ case 12:
+ case 16:
+ putc ('t', file);
+ return;
+
+ case 8:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+#ifdef GAS_MNEMONICS
+ putc ('q', file);
+#else
+ putc ('l', file);
+ putc ('l', file);
+#endif
+ }
+ else
+ putc ('l', file);
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ case 'b':
+ case 'w':
+ case 'k':
+ case 'q':
+ case 'h':
+ case 'y':
+ case 'X':
+ case 'P':
+ break;
+
+ case 's':
+ if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
+ {
+ PRINT_OPERAND (file, x, 0);
+ putc (',', file);
+ }
+ return;
+
+ case 'D':
+ /* Little bit of braindamage here. The SSE compare instructions
+ does use completely different names for the comparisons that the
+ fp conditional moves. */
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ case UNEQ:
+ fputs ("eq", file);
+ break;
+ case LT:
+ case UNLT:
+ fputs ("lt", file);
+ break;
+ case LE:
+ case UNLE:
+ fputs ("le", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ case LTGT:
+ fputs ("neq", file);
+ break;
+ case UNGE:
+ case GE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ case GT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return;
+ case 'O':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ switch (GET_MODE (x))
+ {
+ case HImode: putc ('w', file); break;
+ case SImode:
+ case SFmode: putc ('l', file); break;
+ case DImode:
+ case DFmode: putc ('q', file); break;
+ default: gcc_unreachable ();
+ }
+ putc ('.', file);
+ }
+#endif
+ return;
+ case 'C':
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
+ return;
+ case 'F':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
+ return;
+
+ /* Like above, but reverse condition */
+ case 'c':
+ /* Check to see if argument to %c is really a constant
+ and not a condition code which needs to be reversed. */
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
+ return;
+ }
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
+ return;
+ case 'f':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
+ return;
+
+ case 'H':
+ /* It doesn't actually matter what mode we use here, as we're
+ only going to use this for printing. */
+ x = adjust_address_nv (x, DImode, 8);
+ break;
+
+ case '+':
+ {
+ rtx x;
+
+ if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
+ return;
+
+ x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ if (x)
+ {
+ int pred_val = INTVAL (XEXP (x, 0));
+
+ if (pred_val < REG_BR_PROB_BASE * 45 / 100
+ || pred_val > REG_BR_PROB_BASE * 55 / 100)
+ {
+ int taken = pred_val > REG_BR_PROB_BASE / 2;
+ int cputaken = final_forward_branch_p (current_output_insn) == 0;
+
+ /* Emit hints only in the case default branch prediction
+ heuristics would fail. */
+ if (taken != cputaken)
+ {
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken)
+ fputs ("ds ; ", file);
+ else
+ fputs ("cs ; ", file);
+ }
+ }
+ }
+ return;
+ }
+ default:
+ output_operand_lossage ("invalid operand code '%c'", code);
+ }
+ }
+
+ if (GET_CODE (x) == REG)
+ print_reg (x, code, file);
+
+ else if (GET_CODE (x) == MEM)
+ {
+ /* No `byte ptr' prefix for call instructions. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
+ {
+ const char * size;
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1: size = "BYTE"; break;
+ case 2: size = "WORD"; break;
+ case 4: size = "DWORD"; break;
+ case 8: size = "QWORD"; break;
+ case 12: size = "XWORD"; break;
+ case 16: size = "XMMWORD"; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Check for explicit size override (codes 'b', 'w' and 'k') */
+ if (code == 'b')
+ size = "BYTE";
+ else if (code == 'w')
+ size = "WORD";
+ else if (code == 'k')
+ size = "DWORD";
+
+ fputs (size, file);
+ fputs (" PTR ", file);
+ }
+
+ x = XEXP (x, 0);
+ /* Avoid (%rip) for call operands. */
+ if (CONSTANT_ADDRESS_P (x) && code == 'P'
+ && GET_CODE (x) != CONST_INT)
+ output_addr_const (file, x);
+ else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
+ output_operand_lossage ("invalid constraints for operand");
+ else
+ output_address (x);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+ {
+ REAL_VALUE_TYPE r;
+ long l;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ fprintf (file, "0x%08lx", l);
+ }
+
+ /* These float cases don't actually occur as immediate operands. */
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE
+ && GET_MODE (x) == XFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else
+ {
+ /* We have patterns that allow zero sets of memory, for instance.
+ In 64-bit mode, we should probably support all 8-byte vectors,
+ since we can in fact encode that into an immediate. */
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ gcc_assert (x == CONST0_RTX (GET_MODE (x)));
+ x = const0_rtx;
+ }
+
+ if (code != 'P')
+ {
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ }
+ else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
+ || GET_CODE (x) == LABEL_REF)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ else
+ fputs ("OFFSET FLAT:", file);
+ }
+ }
+ if (GET_CODE (x) == CONST_INT)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ /* APPLE LOCAL begin dynamic-no-pic */
+ else if (flag_pic || (TARGET_MACHO && MACHOPIC_INDIRECT))
+ /* APPLE LOCAL end dynamic-no-pic */
+ output_pic_addr_const (file, x, code);
+ else
+ output_addr_const (file, x);
+ }
+}
+
+/* Print a memory operand whose address is ADDR. */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int scale;
+ int ok = ix86_decompose_address (addr, &parts);
+
+ gcc_assert (ok);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ switch (parts.seg)
+ {
+ case SEG_DEFAULT:
+ break;
+ case SEG_FS:
+ case SEG_GS:
+ if (USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+ fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!base && !index)
+ {
+ /* Displacement only requires special attention. */
+
+ if (GET_CODE (disp) == CONST_INT)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
+ {
+ if (USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+ fputs ("ds:", file);
+ }
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
+ }
+ else if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else
+ output_addr_const (file, disp);
+
+ /* Use one byte shorter RIP relative addressing for 64bit mode. */
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+ disp = XEXP (XEXP (disp, 0), 0);
+ if (GET_CODE (disp) == LABEL_REF
+ || (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == 0))
+ fputs ("(%rip)", file);
+ }
+ }
+ else
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ if (disp)
+ {
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('(', file);
+ if (base)
+ print_reg (base, 0, file);
+ if (index)
+ {
+ putc (',', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, ",%d", scale);
+ }
+ putc (')', file);
+ }
+ else
+ {
+ rtx offset = NULL_RTX;
+
+ if (disp)
+ {
+ /* Pull out the offset of a symbol; print any symbol itself. */
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+ {
+ offset = XEXP (XEXP (disp, 0), 1);
+ disp = gen_rtx_CONST (VOIDmode,
+ XEXP (XEXP (disp, 0), 0));
+ }
+
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else if (GET_CODE (disp) == CONST_INT)
+ offset = disp;
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('[', file);
+ if (base)
+ {
+ print_reg (base, 0, file);
+ if (offset)
+ {
+ if (INTVAL (offset) >= 0)
+ putc ('+', file);
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ }
+ }
+ else if (offset)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ else
+ putc ('0', file);
+
+ if (index)
+ {
+ putc ('+', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, "*%d", scale);
+ }
+ putc (']', file);
+ }
+ }
+}
+
+bool
+output_addr_const_extra (FILE *file, rtx x)
+{
+ rtx op;
+
+ if (GET_CODE (x) != UNSPEC)
+ return false;
+
+ op = XVECEXP (x, 0, 0);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOTTPOFF:
+ output_addr_const (file, op);
+ /* FIXME: This might be @TPOFF in Sun ld. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ output_addr_const (file, op);
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ output_addr_const (file, op);
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs ("@GOTTPOFF(%rip)", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ output_addr_const (file, op);
+ fputs ("@INDNTPOFF", file);
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses,
+ but we still have to handle it. */
+ if (GET_CODE (op) == MEM)
+ {
+ lo_half[num] = adjust_address (op, SImode, 0);
+ hi_half[num] = adjust_address (op, SImode, 4);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 0);
+ hi_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 4);
+ }
+ }
+}
+/* Split one or more TImode RTL references into pairs of DImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses, but we
+ still have to handle it. */
+ if (GET_CODE (op) == MEM)
+ {
+ lo_half[num] = adjust_address (op, DImode, 0);
+ hi_half[num] = adjust_address (op, DImode, 8);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
+ hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
+ }
+ }
+}
+
+/* Output code to perform a 387 binary operation in INSN, one of PLUS,
+ MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
+ is the expression of the binary operation. The output may either be
+ emitted here, or returned to the caller, like all output_* functions.
+
+ There is no guarantee that the operands are the same mode, as they
+ might be within FLOAT or FLOAT_EXTEND expressions. */
+
+#ifndef SYSV386_COMPAT
+/* Set to 1 for compatibility with brain-damaged assemblers. No-one
+ wants to fix the assemblers because that causes incompatibility
+ with gcc. No-one wants to fix gcc because that causes
+ incompatibility with assemblers... You can use the option of
+ -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
+#define SYSV386_COMPAT 1
+#endif
+
+const char *
+output_387_binary_op (rtx insn, rtx *operands)
+{
+ static char buf[30];
+ const char *p;
+ const char *ssep;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
+
+#ifdef ENABLE_CHECKING
+ /* Even if we do not want to check the inputs, this documents input
+ constraints. Which helps in understanding the following code. */
+ if (STACK_REG_P (operands[0])
+ && ((REG_P (operands[1])
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
+ || (REG_P (operands[2])
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
+ && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
+ ; /* ok */
+ else
+ gcc_assert (is_sse);
+#endif
+
+ switch (GET_CODE (operands[3]))
+ {
+ case PLUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fiadd";
+ else
+ p = "fadd";
+ ssep = "add";
+ break;
+
+ case MINUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fisub";
+ else
+ p = "fsub";
+ ssep = "sub";
+ break;
+
+ case MULT:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fimul";
+ else
+ p = "fmul";
+ ssep = "mul";
+ break;
+
+ case DIV:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fidiv";
+ else
+ p = "fdiv";
+ ssep = "div";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (is_sse)
+ {
+ strcpy (buf, ssep);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %0|%0, %2}");
+ else
+ strcat (buf, "sd\t{%2, %0|%0, %2}");
+ return buf;
+ }
+ strcpy (buf, p);
+
+ switch (GET_CODE (operands[3]))
+ {
+ case MULT:
+ case PLUS:
+ if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
+ {
+ rtx temp = operands[2];
+ operands[2] = operands[1];
+ operands[1] = temp;
+ }
+
+ /* know operands[0] == operands[1]. */
+
+ if (GET_CODE (operands[2]) == MEM)
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+ if (STACK_TOP_P (operands[0]))
+ /* How is it that we are storing to a dead operand[2]?
+ Well, presumably operands[1] is dead too. We can't
+ store the result to st(0) as st(0) gets popped on this
+ instruction. Instead store to operands[2] (which I
+ think has to be st(1)). st(1) will be popped later.
+ gcc <= 2.8.1 didn't have this check and generated
+ assembly code that the Unixware assembler rejected. */
+ p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+ break;
+
+ case MINUS:
+ case DIV:
+ if (GET_CODE (operands[1]) == MEM)
+ {
+ p = "r%z1\t%1";
+ break;
+ }
+
+ if (GET_CODE (operands[2]) == MEM)
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+#if SYSV386_COMPAT
+ /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
+ derived assemblers, confusingly reverse the direction of
+ the operation for fsub{r} and fdiv{r} when the
+ destination register is not st(0). The Intel assembler
+ doesn't have this brain damage. Read !SYSV386_COMPAT to
+ figure out what the hardware really does. */
+ if (STACK_TOP_P (operands[0]))
+ p = "{p\t%0, %2|rp\t%2, %0}";
+ else
+ p = "{rp\t%2, %0|p\t%0, %2}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ /* As above for fmul/fadd, we can't store to st(0). */
+ p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+#endif
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+#if SYSV386_COMPAT
+ if (STACK_TOP_P (operands[0]))
+ p = "{rp\t%0, %1|p\t%1, %0}";
+ else
+ p = "{p\t%1, %0|rp\t%0, %1}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
+ else
+ p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
+#endif
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ {
+ if (STACK_TOP_P (operands[1]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
+ break;
+ }
+ else if (STACK_TOP_P (operands[1]))
+ {
+#if SYSV386_COMPAT
+ p = "{\t%1, %0|r\t%0, %1}";
+#else
+ p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
+#endif
+ }
+ else
+ {
+#if SYSV386_COMPAT
+ p = "{r\t%2, %0|\t%0, %2}";
+#else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+#endif
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ strcat (buf, p);
+ return buf;
+}
+
+/* Return needed mode for entity in optimize_mode_switching pass. */
+
+int
+ix86_mode_needed (int entity, rtx insn)
+{
+ enum attr_i387_cw mode;
+
+ /* The mode UNINITIALIZED is used to store control word after a
+ function call or ASM pattern. The mode ANY specify that function
+ has no requirements on the control word and make no changes in the
+ bits we are interested in. */
+
+ if (CALL_P (insn)
+ || (NONJUMP_INSN_P (insn)
+ && (asm_noperands (PATTERN (insn)) >= 0
+ || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
+ return I387_CW_UNINITIALIZED;
+
+ if (recog_memoized (insn) < 0)
+ return I387_CW_ANY;
+
+ mode = get_attr_i387_cw (insn);
+
+ switch (entity)
+ {
+ case I387_TRUNC:
+ if (mode == I387_CW_TRUNC)
+ return mode;
+ break;
+
+ case I387_FLOOR:
+ if (mode == I387_CW_FLOOR)
+ return mode;
+ break;
+
+ case I387_CEIL:
+ if (mode == I387_CW_CEIL)
+ return mode;
+ break;
+
+ case I387_MASK_PM:
+ if (mode == I387_CW_MASK_PM)
+ return mode;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return I387_CW_ANY;
+}
+
+/* Output code to initialize control word copies used by trunc?f?i and
+ rounding patterns. CURRENT_MODE is set to current control word,
+ while NEW_MODE is set to new control word. */
+
+void
+emit_i387_cw_initialization (int mode)
+{
+ rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ rtx new_mode;
+
+ int slot;
+
+ rtx reg = gen_reg_rtx (HImode);
+
+ emit_insn (gen_x86_fnstcw_1 (stored_mode));
+ emit_move_insn (reg, stored_mode);
+
+ if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ gcc_assert (slot < MAX_386_STACK_LOCALS);
+
+ new_mode = assign_386_stack_local (HImode, slot);
+ emit_move_insn (new_mode, reg);
+}
+
+/* Output code for INSN to convert a float to a signed int. OPERANDS
+ are the insn operands. The output may be [HSD]Imode and the input
+ operand may be [SDX]Fmode. */
+
+const char *
+output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+{
+ int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+ int dimode_p = GET_MODE (operands[0]) == DImode;
+ int round_mode = get_attr_i387_cw (insn);
+
+ /* Jump through a hoop or two for DImode, since the hardware has no
+ non-popping instruction. We used to do this a different way, but
+ that was somewhat fragile and broke with post-reload splitters. */
+ if ((dimode_p || fisttp) && !stack_top_dies)
+ output_asm_insn ("fld\t%y1", operands);
+
+ gcc_assert (STACK_TOP_P (operands[1]));
+ gcc_assert (GET_CODE (operands[0]) == MEM);
+
+ if (fisttp)
+ output_asm_insn ("fisttp%z0\t%0", operands);
+ else
+ {
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%3", operands);
+ if (stack_top_dies || dimode_p)
+ output_asm_insn ("fistp%z0\t%0", operands);
+ else
+ output_asm_insn ("fist%z0\t%0", operands);
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%2", operands);
+ }
+
+ return "";
+}
+
+/* Output code for x87 ffreep insn. The OPNO argument, which may only
+ have the values zero or one, indicates the ffreep insn's operand
+ from the OPERANDS array. */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+ if (TARGET_USE_FFREEP)
+#if HAVE_AS_IX86_FFREEP
+ return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+ switch (REGNO (operands[opno]))
+ {
+ case FIRST_STACK_REG + 0: return ".word\t0xc0df";
+ case FIRST_STACK_REG + 1: return ".word\t0xc1df";
+ case FIRST_STACK_REG + 2: return ".word\t0xc2df";
+ case FIRST_STACK_REG + 3: return ".word\t0xc3df";
+ case FIRST_STACK_REG + 4: return ".word\t0xc4df";
+ case FIRST_STACK_REG + 5: return ".word\t0xc5df";
+ case FIRST_STACK_REG + 6: return ".word\t0xc6df";
+ case FIRST_STACK_REG + 7: return ".word\t0xc7df";
+ }
+#endif
+
+ return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
+/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
+ should be used. UNORDERED_P is true when fucom should be used. */
+
+const char *
+output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+{
+ int stack_top_dies;
+ rtx cmp_op0, cmp_op1;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
+
+ if (eflags_p)
+ {
+ cmp_op0 = operands[0];
+ cmp_op1 = operands[1];
+ }
+ else
+ {
+ cmp_op0 = operands[1];
+ cmp_op1 = operands[2];
+ }
+
+ if (is_sse)
+ {
+ if (GET_MODE (operands[0]) == SFmode)
+ if (unordered_p)
+ return "ucomiss\t{%1, %0|%0, %1}";
+ else
+ return "comiss\t{%1, %0|%0, %1}";
+ else
+ if (unordered_p)
+ return "ucomisd\t{%1, %0|%0, %1}";
+ else
+ return "comisd\t{%1, %0|%0, %1}";
+ }
+
+ gcc_assert (STACK_TOP_P (cmp_op0));
+
+ stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+
+ if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
+ {
+ if (stack_top_dies)
+ {
+ output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
+ return output_387_ffreep (operands, 1);
+ }
+ else
+ return "ftst\n\tfnstsw\t%0";
+ }
+
+ if (STACK_REG_P (cmp_op1)
+ && stack_top_dies
+ && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
+ && REGNO (cmp_op1) != FIRST_STACK_REG)
+ {
+ /* If both the top of the 387 stack dies, and the other operand
+ is also a stack register that dies, then this must be a
+ `fcompp' float compare */
+
+ if (eflags_p)
+ {
+ /* There is no double popping fcomi variant. Fortunately,
+ eflags is immune from the fstp's cc clobbering. */
+ if (unordered_p)
+ output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
+ else
+ output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
+ return output_387_ffreep (operands, 0);
+ }
+ else
+ {
+ if (unordered_p)
+ return "fucompp\n\tfnstsw\t%0";
+ else
+ return "fcompp\n\tfnstsw\t%0";
+ }
+ }
+ else
+ {
+ /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
+
+ static const char * const alt[16] =
+ {
+ "fcom%z2\t%y2\n\tfnstsw\t%0",
+ "fcomp%z2\t%y2\n\tfnstsw\t%0",
+ "fucom%z2\t%y2\n\tfnstsw\t%0",
+ "fucomp%z2\t%y2\n\tfnstsw\t%0",
+
+ "ficom%z2\t%y2\n\tfnstsw\t%0",
+ "ficomp%z2\t%y2\n\tfnstsw\t%0",
+ NULL,
+ NULL,
+
+ "fcomi\t{%y1, %0|%0, %y1}",
+ "fcomip\t{%y1, %0|%0, %y1}",
+ "fucomi\t{%y1, %0|%0, %y1}",
+ "fucomip\t{%y1, %0|%0, %y1}",
+
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ int mask;
+ const char *ret;
+
+ mask = eflags_p << 3;
+ mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
+ mask |= unordered_p << 1;
+ mask |= stack_top_dies;
+
+ gcc_assert (mask < 16);
+ ret = alt[mask];
+ gcc_assert (ret);
+
+ return ret;
+ }
+}
+
+void
+ix86_output_addr_vec_elt (FILE *file, int value)
+{
+ const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+ if (TARGET_64BIT)
+ directive = ASM_QUAD;
+#else
+ gcc_assert (!TARGET_64BIT);
+#endif
+
+ fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
+}
+
+void
+ix86_output_addr_diff_elt (FILE *file, int value, int rel)
+{
+ if (TARGET_64BIT)
+ fprintf (file, "%s%s%d-%s%d\n",
+ ASM_LONG, LPREFIX, value, LPREFIX, rel);
+ else if (HAVE_AS_GOTOFF_IN_DATA)
+ fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
+#if TARGET_MACHO
+ else if (TARGET_MACHO)
+ {
+ fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
+ machopic_output_function_base_name (file);
+ fprintf(file, "\n");
+ }
+#endif
+ else
+ asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
+ ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
+}
+
+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
+ for the target. */
+
+void
+ix86_expand_clear (rtx dest)
+{
+ rtx tmp;
+
+ /* We play register width games, which are only valid after reload. */
+ gcc_assert (reload_completed);
+
+ /* Avoid HImode and its attendant prefix byte. */
+ if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+
+ tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
+
+ /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
+ if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
+ {
+ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+ }
+
+ emit_insn (tmp);
+}
+
+/* X is an unchanging MEM. If it is a constant pool reference, return
+ the constant pool rtx, else NULL. */
+
+rtx
+maybe_get_pool_constant (rtx x)
+{
+ x = ix86_delegitimize_address (XEXP (x, 0));
+
+ if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ return get_pool_constant (x);
+
+ return NULL_RTX;
+}
+
+void
+ix86_expand_move (enum machine_mode mode, rtx operands[])
+{
+ int strict = (reload_in_progress || reload_completed);
+ /* APPLE LOCAL dynamic-no-pic */
+ rtx insn, op0, op1;
+ enum tls_model model;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (GET_CODE (op1) == SYMBOL_REF)
+ {
+ model = SYMBOL_REF_TLS_MODEL (op1);
+ if (model)
+ {
+ op1 = legitimize_tls_address (op1, model, true);
+ op1 = force_operand (op1, op0);
+ if (op1 == op0)
+ return;
+ }
+ }
+ else if (GET_CODE (op1) == CONST
+ && GET_CODE (XEXP (op1, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
+ {
+ model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
+ if (model)
+ {
+ rtx addend = XEXP (XEXP (op1, 0), 1);
+ op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
+ op1 = force_operand (op1, NULL);
+ op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
+ op0, 1, OPTAB_DIRECT);
+ if (op1 == op0)
+ return;
+ }
+ }
+
+ /* APPLE LOCAL begin dynamic-no-pic */
+ /* allow macho & macho for x86_64 to coexist */
+ if (((TARGET_MACHO && MACHOPIC_INDIRECT)
+ || flag_pic)
+ && mode == Pmode && symbolic_operand (op1, Pmode))
+ /* APPLE LOCAL end dynamic-no-pic */
+ {
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ /* APPLE LOCAL begin dynamic-no-pic */
+ if (MACHOPIC_INDIRECT)
+ {
+ rtx temp = ((reload_in_progress
+ || ((op0 && GET_CODE (op0) == REG)
+ && mode == Pmode))
+ ? op0 : gen_reg_rtx (Pmode));
+ op1 = machopic_indirect_data_reference (op1, temp);
+ if (MACHOPIC_PURE)
+ op1 = machopic_legitimize_pic_address (op1, mode,
+ temp == op1 ? 0 : temp);
+ }
+ if (op0 != op1 && GET_CODE (op0) != MEM)
+ {
+ insn = gen_rtx_SET (VOIDmode, op0, op1);
+ emit_insn (insn);
+ return;
+ }
+ if (GET_CODE (op0) == MEM)
+ op1 = force_reg (Pmode, op1);
+ else
+ {
+ rtx temp = op0;
+ if (GET_CODE (temp) != REG)
+ temp = gen_reg_rtx (Pmode);
+ temp = legitimize_pic_address (op1, temp);
+ if (temp == op0)
+ return;
+ op1 = temp;
+ }
+ /* APPLE LOCAL end dynamic-no-pic */
+#endif
+ }
+ else
+ {
+ if (GET_CODE (op0) == MEM)
+ op1 = force_reg (Pmode, op1);
+ else
+ op1 = legitimize_address (op1, op1, Pmode);
+ }
+ }
+ else
+ {
+ if (GET_CODE (op0) == MEM
+ && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
+ || !push_operand (op0, mode))
+ && GET_CODE (op1) == MEM)
+ op1 = force_reg (mode, op1);
+
+ if (push_operand (op0, mode)
+ && ! general_no_elim_operand (op1, mode))
+ op1 = copy_to_mode_reg (mode, op1);
+
+ /* Force large constants in 64bit compilation into register
+ to get them CSEed. */
+ if (TARGET_64BIT && mode == DImode
+ && immediate_operand (op1, mode)
+ && !x86_64_zext_immediate_operand (op1, VOIDmode)
+ && !register_operand (op0, mode)
+ && optimize && !reload_completed && !reload_in_progress)
+ op1 = copy_to_mode_reg (mode, op1);
+
+ if (FLOAT_MODE_P (mode))
+ {
+ /* If we are loading a floating point constant to a register,
+ force the value to memory now, since we'll get better code
+ out the back end. */
+
+ if (strict)
+ ;
+ else if (GET_CODE (op1) == CONST_DOUBLE)
+ {
+ op1 = validize_mem (force_const_mem (mode, op1));
+ if (!register_operand (op0, mode))
+ {
+ rtx temp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+ emit_move_insn (op0, temp);
+ return;
+ }
+ }
+ }
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+void
+ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
+{
+ rtx op0 = operands[0], op1 = operands[1];
+ /* APPLE LOCAL begin radar 4614623 */
+ cfun->uses_vector = 1;
+ /* APPLE LOCAL end radar 4614623 */
+
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (op0, mode)
+ && CONSTANT_P (op1)
+ && standard_sse_constant_p (op1) <= 0)
+ op1 = validize_mem (force_const_mem (mode, op1));
+
+ /* Make operand1 a register if it isn't already. */
+ if (!no_new_pseudos
+ && !register_operand (op0, mode)
+ && !register_operand (op1, mode))
+ {
+ emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE. Non-SSE modes go
+ straight to ix86_expand_vector_move. */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+ rtx op0, op1, m;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (MEM_P (op1))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_size)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? If we have typed data, then it would appear that using
+ movdqu is the only way to get unaligned data loaded with
+ integer type. */
+ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ rtx zero;
+
+ /* When SSE registers are split into halves, we can avoid
+ writing to the top half twice. */
+ if (TARGET_SSE_SPLIT_REGS)
+ {
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ zero = op0;
+ }
+ else
+ {
+ /* ??? Not sure about the best option for the Intel chips.
+ The following would seem to satisfy; the register is
+ entirely cleared, breaking the dependency chain. We
+ then store to the upper half, with a dependency depth
+ of one. A rumor has it that Intel recommends two movsd
+ followed by an unpacklpd, but this is unconfirmed. And
+ given that the dependency depth of the unpacklpd would
+ still be one, I'm not sure why this would be better. */
+ zero = CONST0_RTX (V2DFmode);
+ }
+
+ m = adjust_address (op1, DFmode, 0);
+ emit_insn (gen_sse2_loadlpd (op0, zero, m));
+ m = adjust_address (op1, DFmode, 8);
+ emit_insn (gen_sse2_loadhpd (op0, op0, m));
+ }
+ else
+ {
+ if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+ emit_move_insn (op0, CONST0_RTX (mode));
+ else
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+
+ if (mode != V4SFmode)
+ op0 = gen_lowpart (V4SFmode, op0);
+ m = adjust_address (op1, V2SFmode, 0);
+ emit_insn (gen_sse_loadlps (op0, op0, m));
+ m = adjust_address (op1, V2SFmode, 8);
+ emit_insn (gen_sse_loadhps (op0, op0, m));
+ }
+ }
+ else if (MEM_P (op0))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_size)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? Similar to above, only less clear because of quote
+ typeless stores unquote. */
+ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ m = adjust_address (op0, DFmode, 0);
+ emit_insn (gen_sse2_storelpd (m, op1));
+ m = adjust_address (op0, DFmode, 8);
+ emit_insn (gen_sse2_storehpd (m, op1));
+ }
+ else
+ {
+ if (mode != V4SFmode)
+ op1 = gen_lowpart (V4SFmode, op1);
+ m = adjust_address (op0, V2SFmode, 0);
+ emit_insn (gen_sse_storelps (m, op1));
+ m = adjust_address (op0, V2SFmode, 8);
+ emit_insn (gen_sse_storehps (m, op1));
+ }
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* Expand a push in MODE. This is some mode for which we do not support
+ proper push instructions, at least from the registers that we expect
+ the value to live in. */
+
+void
+ix86_expand_push (enum machine_mode mode, rtx x)
+{
+ rtx tmp;
+
+ tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
+ GEN_INT (-GET_MODE_SIZE (mode)),
+ stack_pointer_rtx, 1, OPTAB_DIRECT);
+ if (tmp != stack_pointer_rtx)
+ emit_move_insn (stack_pointer_rtx, tmp);
+
+ tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
+ emit_move_insn (tmp, x);
+}
+
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
+ destination to use for the operation. If different from the true
+ destination in operands[0], a copy operation will be required. */
+
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src1, src2, dst;
+
+ dst = operands[0];
+ src1 = operands[1];
+ src2 = operands[2];
+
+ /* Recognize <var1> = <value> <op> <var1> for commutative operators */
+ if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && (rtx_equal_p (dst, src2)
+ || immediate_operand (src1, mode)))
+ {
+ rtx temp = src1;
+ src1 = src2;
+ src2 = temp;
+ }
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (GET_CODE (dst) == MEM)
+ {
+ if (rtx_equal_p (dst, src1))
+ matching_memory = 1;
+ else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && rtx_equal_p (dst, src2))
+ matching_memory = 2;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* Both source operands cannot be in memory. */
+ if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
+ {
+ if (matching_memory != 2)
+ src2 = force_reg (mode, src2);
+ else
+ src1 = force_reg (mode, src1);
+ }
+
+ /* If the operation is not commutable, source 1 cannot be a constant
+ or non-matching memory. */
+ if ((CONSTANT_P (src1)
+ || (!matching_memory && GET_CODE (src1) == MEM))
+ && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+ src1 = force_reg (mode, src1);
+
+ src1 = operands[1] = src1;
+ src2 = operands[2] = src2;
+ return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+ set up properly. */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+ enum machine_mode mode, rtx operands[])
+{
+ rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+ gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 3 separate
+ memory references (one output, two input) in a single insn. */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx src1, src2, dst, op, clob;
+
+ dst = ix86_fixup_binary_operands (code, mode, operands);
+ src1 = operands[1];
+ src2 = operands[2];
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
+ if (reload_in_progress)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. We can only do this with PLUS. */
+ gcc_assert (code == PLUS);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the binary operator meets the
+ appropriate constraints. */
+
+int
+ix86_binary_operator_ok (enum rtx_code code,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx operands[3])
+{
+ /* Both source operands cannot be in memory. */
+ if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
+ return 0;
+ /* If the operation is not commutable, source 1 cannot be a constant. */
+ if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+ return 0;
+ /* If the destination is memory, we must have a matching source operand. */
+ if (GET_CODE (operands[0]) == MEM
+ && ! (rtx_equal_p (operands[0], operands[1])
+ || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && rtx_equal_p (operands[0], operands[2]))))
+ return 0;
+ /* If the operation is not commutable and the source 1 is memory, we must
+ have a matching destination. */
+ if (GET_CODE (operands[1]) == MEM
+ && GET_RTX_CLASS (code) != RTX_COMM_ARITH
+ && ! rtx_equal_p (operands[0], operands[1]))
+ return 0;
+ return 1;
+}
+
+/* Attempt to expand a unary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 2 separate
+ memory references (one output, one input) in a single insn. */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src, dst, op, clob;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (MEM_P (dst))
+ {
+ if (rtx_equal_p (dst, src))
+ matching_memory = 1;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* When source operand is memory, destination must match. */
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+ if (reload_in_progress || code == NOT)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. */
+ gcc_assert (code == NOT);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the unary operator meets the
+ appropriate constraints. */
+
+int
+ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx operands[2] ATTRIBUTE_UNUSED)
+{
+ /* If one of operands is memory, source and destination must match. */
+ if ((GET_CODE (operands[0]) == MEM
+ || GET_CODE (operands[1]) == MEM)
+ && ! rtx_equal_p (operands[0], operands[1]))
+ return FALSE;
+ return TRUE;
+}
+
+/* APPLE LOCAL begin 4176531 4424891 */
+static void
+ix86_expand_vector_move2 (enum machine_mode mode, rtx op0, rtx op1)
+{
+ rtx operands[2];
+ operands[0] = op0;
+ operands[1] = op1;
+ ix86_expand_vector_move (mode, operands);
+}
+
+static rtvec
+gen_2_4_rtvec (int scalars_per_vector, rtx val, enum machine_mode mode)
+{
+ rtvec rval;
+ switch (scalars_per_vector)
+ {
+ case 2: rval = gen_rtvec (2, val, CONST0_RTX (mode));
+ break;
+ case 4: rval = gen_rtvec (4, val, CONST0_RTX (mode),
+ CONST0_RTX (mode), CONST0_RTX (mode));
+ break;
+ default: abort ();
+ }
+ return rval;
+}
+
+/* Convert a DFmode value in an SSE register into an unsigned SImode.
+ When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
+ conversion, and ignoring the upper 32 bits of the result. On
+ x86_64, there is an equivalent SSE %xmm->signed-int-64 conversion.
+ On x86_32, we don't have the instruction, nor the 64-bit
+ destination register it requires. Do the conversion inline in the
+ SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_DF2SI_sse (rtx operands[])
+{
+ rtx int_zero_as_fp, int_maxval_as_fp, int_two31_as_fp;
+ REAL_VALUE_TYPE rvt_zero, rvt_int_maxval, rvt_int_two31;
+ rtx int_zero_as_xmm, int_maxval_as_xmm;
+ rtx fp_value = operands[1];
+ rtx target = operands[0];
+ rtx large_xmm;
+ rtx large_xmm_v2di;
+ rtx le_op;
+ rtx zero_or_two31_xmm;
+ rtx final_result_rtx;
+ rtx v_rtx;
+ rtx incoming_value;
+
+ cfun->uses_vector = 1;
+
+ real_from_integer (&rvt_zero, DFmode, 0ULL, 0ULL, 1);
+ int_zero_as_fp = const_double_from_real_value (rvt_zero, DFmode);
+
+ real_from_integer (&rvt_int_maxval, DFmode, 0xffffffffULL, 0ULL, 1);
+ int_maxval_as_fp = const_double_from_real_value (rvt_int_maxval, DFmode);
+
+ real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
+
+ incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ fp_value = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, fp_value,
+ gen_rtx_SUBREG (V2DFmode, incoming_value, 0));
+ large_xmm = gen_reg_rtx (V2DFmode);
+
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, large_xmm, v_rtx);
+ le_op = gen_rtx_fmt_ee (LE, V2DFmode,
+ gen_rtx_SUBREG (V2DFmode, fp_value, 0), large_xmm);
+ /* large_xmm = (fp_value >= 2**31) ? -1 : 0 ; */
+ emit_insn (gen_sse2_vmmaskcmpv2df3 (large_xmm, large_xmm, fp_value, le_op));
+
+ int_maxval_as_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_maxval_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, int_maxval_as_xmm, v_rtx);
+
+ emit_insn (gen_sse2_vmsminv2df3 (fp_value, fp_value, int_maxval_as_xmm));
+
+ int_zero_as_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_zero_as_fp, DFmode));
+
+ ix86_expand_vector_move2 (DFmode, int_zero_as_xmm, v_rtx);
+
+ emit_insn (gen_sse2_vmsmaxv2df3 (fp_value, fp_value, int_zero_as_xmm));
+
+ zero_or_two31_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, zero_or_two31_xmm, v_rtx);
+
+ /* zero_or_two31 = (large_xmm) ? 2**31 : 0; */
+ emit_insn (gen_andv2df3 (zero_or_two31_xmm, zero_or_two31_xmm, large_xmm));
+ /* if (large_xmm) fp_value -= 2**31; */
+ emit_insn (gen_subv2df3 (fp_value, fp_value, zero_or_two31_xmm));
+ /* assert (0 <= fp_value && fp_value < 2**31);
+ int_result = trunc (fp_value); */
+ final_result_rtx = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_cvttpd2dq (final_result_rtx, fp_value));
+
+ large_xmm_v2di = gen_reg_rtx (V2DImode);
+ emit_move_insn (large_xmm_v2di, gen_rtx_SUBREG (V2DImode, large_xmm, 0));
+ emit_insn (gen_ashlv2di3 (large_xmm_v2di, large_xmm_v2di,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ emit_insn (gen_xorv4si3 (final_result_rtx, final_result_rtx,
+ gen_rtx_SUBREG (V4SImode, large_xmm_v2di, 0)));
+ if (!rtx_equal_p (target, final_result_rtx))
+ emit_insn (gen_sse2_stored (target, final_result_rtx));
+ return "";
+}
+
+/* Convert a SFmode value in an SSE register into an unsigned DImode.
+ When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
+ conversion, and subsequently ignoring the upper 32 bits of the
+ result. On x86_64, there is an equivalent SSE %xmm->signed-int-64
+ conversion. On x86_32, we don't have the instruction, nor the
+ 64-bit destination register it requires. Do the conversion inline
+ in the SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_SF2SI_sse (rtx operands[])
+{
+ rtx int_zero_as_fp, int_two31_as_fp, int_two32_as_fp;
+ REAL_VALUE_TYPE rvt_zero, rvt_int_two31, rvt_int_two32;
+ rtx int_zero_as_xmm;
+ rtx fp_value = operands[1];
+ rtx target = operands[0];
+ rtx large_xmm;
+ rtx two31_xmm, two32_xmm;
+ rtx above_two31_xmm, above_two32_xmm;
+ rtx zero_or_two31_SI_xmm;
+ rtx le_op;
+ rtx zero_or_two31_SF_xmm;
+ rtx int_result_xmm;
+ rtx v_rtx;
+ rtx incoming_value;
+
+ cfun->uses_vector = 1;
+
+ real_from_integer (&rvt_zero, SFmode, 0ULL, 0ULL, 1);
+ int_zero_as_fp = const_double_from_real_value (rvt_zero, SFmode);
+
+ real_from_integer (&rvt_int_two31, SFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, SFmode);
+
+ real_from_integer (&rvt_int_two32, SFmode, (HOST_WIDE_INT)0x100000000ULL,
+ 0ULL, 1);
+ int_two32_as_fp = const_double_from_real_value (rvt_int_two32, SFmode);
+
+ incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ fp_value = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (V4SFmode, fp_value,
+ gen_rtx_SUBREG (V4SFmode, incoming_value, 0));
+ large_xmm = gen_reg_rtx (V4SFmode);
+
+ /* fp_value = MAX (fp_value, 0.0); */
+ /* Preclude negative values; truncate at zero. */
+ int_zero_as_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_zero_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, int_zero_as_xmm, v_rtx);
+ emit_insn (gen_sse_vmsmaxv4sf3 (fp_value, fp_value, int_zero_as_xmm));
+
+ /* two31_xmm = 0x8000000; */
+ two31_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_two31_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, two31_xmm, v_rtx);
+
+ /* zero_or_two31_xmm = 0x8000000; */
+ zero_or_two31_SF_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, zero_or_two31_SF_xmm, two31_xmm);
+
+ /* above_two31_xmm = (fp_value >= 2**31) ? 0xffff_ffff : 0 ; */
+ above_two31_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, above_two31_xmm, two31_xmm);
+ le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two31_xmm,
+ gen_rtx_SUBREG (V4SFmode, two31_xmm, 0));
+ emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two31_xmm, above_two31_xmm,
+ fp_value, le_op));
+
+ /* two32_xmm = 0x1_0000_0000; */
+ two32_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_two32_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, two32_xmm, v_rtx);
+
+ /* above_two32_xmm = (fp_value >= 2**32) ? 0xffff_ffff : 0 ; */
+ above_two32_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, above_two32_xmm, two32_xmm);
+ le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two32_xmm,
+ gen_rtx_SUBREG (V4SFmode, two32_xmm, 0));
+ emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two32_xmm, above_two32_xmm,
+ fp_value, le_op));
+
+ /* zero_or_two31_SF_xmm = (above_two31_xmm) ? 2**31 : 0; */
+ emit_insn (gen_andv4sf3 (zero_or_two31_SF_xmm, zero_or_two31_SF_xmm,
+ above_two31_xmm));
+
+ /* zero_or_two31_SI_xmm = (above_two31_xmm & 0x8000_0000); */
+ zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
+ emit_move_insn (zero_or_two31_SI_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
+ emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ /* zero_or_two31_SI_xmm = (above_two_31_xmm << 31); */
+ zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
+ emit_move_insn (zero_or_two31_SI_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
+ emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ /* if (above_two31_xmm) fp_value -= 2**31; */
+ /* If the input FP value is greater than 2**31, subtract that amount
+ from the FP value before conversion. We'll re-add that amount as
+ an integer after the conversion. */
+ emit_insn (gen_subv4sf3 (fp_value, fp_value, zero_or_two31_SF_xmm));
+
+ /* assert (0.0 <= fp_value && fp_value < 2**31);
+ int_result_xmm = trunc (fp_value); */
+ /* Apply the SSE double -> signed_int32 conversion to our biased,
+ clamped SF value. */
+ int_result_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_cvttps2dq (int_result_xmm, fp_value));
+
+ /* int_result_xmm += zero_or_two_31_SI_xmm; */
+ /* Restore the 2**31 bias we may have subtracted earlier. If the
+ input FP value was between 2**31 and 2**32, this will unbias the
+ result.
+
+ input_fp_value < 2**31: this won't change the value
+ 2**31 <= input_fp_value < 2**32:
+ this will restore the 2**31 bias we subtracted earler
+ input_fp_value >= 2**32: this insn doesn't matter;
+ the next insn will clobber this result
+ */
+ emit_insn (gen_addv4si3 (int_result_xmm, int_result_xmm,
+ zero_or_two31_SI_xmm));
+
+ /* int_result_xmm |= above_two32_xmm; */
+ /* If the input value was greater than 2**32, force the integral
+ result to 0xffff_ffff. */
+ emit_insn (gen_iorv4si3 (int_result_xmm, int_result_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two32_xmm, 0)));
+
+ if (!rtx_equal_p (target, int_result_xmm))
+ emit_insn (gen_sse2_stored (target, int_result_xmm));
+ return "";
+}
+
+/* Convert an unsigned DImode value into a DFmode, using only SSE.
+ Expects the 64-bit DImode to be supplied as two 32-bit parts in two
+ SSE %xmm registers; result returned in an %xmm register. Requires
+ SSE2; will use SSE3 if available. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_DI2DF_sse (rtx operands[])
+{
+ REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
+ rtx bias_lo_rtx, bias_hi_rtx;
+ rtx target = operands[0];
+ rtx fp_value = operands[1];
+ rtx fp_value_hi, fp_value_lo;
+ rtx fp_value_hi_xmm, fp_value_lo_xmm;
+ rtx int_xmm;
+ rtx final_result_xmm, result_lo_xmm;
+ rtx biases, exponents;
+ rtvec biases_rtvec, exponents_rtvec;
+
+ cfun->uses_vector = 1;
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ int_xmm = gen_reg_rtx (V4SImode);
+
+ fp_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ fp_value_lo = gen_rtx_SUBREG (SImode, fp_value, 0);
+ fp_value_lo_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_lo_xmm, CONST0_RTX (V4SImode),
+ fp_value_lo));
+
+ fp_value_hi = gen_rtx_SUBREG (SImode, fp_value, 4);
+ fp_value_hi_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_hi_xmm, CONST0_RTX (V4SImode),
+ fp_value_hi));
+
+ ix86_expand_vector_move2 (V4SImode, int_xmm, fp_value_hi_xmm);
+ emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, fp_value_lo_xmm));
+
+ exponents_rtvec = gen_rtvec (4, GEN_INT (0x45300000UL),
+ GEN_INT (0x43300000UL),
+ CONST0_RTX (SImode), CONST0_RTX (SImode));
+ exponents = validize_mem (
+ force_const_mem (V4SImode, gen_rtx_CONST_VECTOR (V4SImode,
+ exponents_rtvec)));
+ emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
+
+ final_result_xmm = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, final_result_xmm,
+ gen_rtx_SUBREG (V2DFmode, int_xmm, 0));
+
+ /* Integral versions of the DFmode 'exponents' above. */
+ REAL_VALUE_FROM_INT (bias_lo_rvt, 0x00000000000000ULL, 0x100000ULL, DFmode);
+ REAL_VALUE_FROM_INT (bias_hi_rvt, 0x10000000000000ULL, 0x000000ULL, DFmode);
+ bias_lo_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_lo_rvt, DFmode);
+ bias_hi_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_hi_rvt, DFmode);
+ biases_rtvec = gen_rtvec (2, bias_lo_rtx, bias_hi_rtx);
+ biases = validize_mem (force_const_mem (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ biases_rtvec)));
+ emit_insn (gen_subv2df3 (final_result_xmm, final_result_xmm, biases));
+
+ if (TARGET_SSE3)
+ {
+ emit_insn (gen_sse3_haddv2df3 (final_result_xmm, final_result_xmm,
+ final_result_xmm));
+ }
+ else
+ {
+ result_lo_xmm = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, result_lo_xmm, final_result_xmm);
+ emit_insn (gen_sse2_unpckhpd (final_result_xmm, final_result_xmm,
+ final_result_xmm));
+ emit_insn (gen_addv2df3 (final_result_xmm, final_result_xmm,
+ result_lo_xmm));
+ }
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+/* APPLE LOCAL end 4176531 4424891 */
+
+/* APPLE LOCAL begin 4424891 */
+/* Convert an unsigned SImode value into a DFmode, using only SSE.
+ Result returned in an %xmm register. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_SI2DF_sse (rtx operands[])
+{
+ REAL_VALUE_TYPE rvt_int_two31;
+ rtx int_value_reg;
+ rtx fp_value_xmm, fp_value_as_int_xmm;
+ rtx final_result_xmm;
+ rtx int_two31_as_fp, int_two31_as_fp_vec;
+ rtx v_rtx;
+ rtx target = operands[0];
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+ gcc_assert (GET_MODE (operands[1]) == SImode);
+
+ cfun->uses_vector = 1;
+
+ int_value_reg = gen_reg_rtx (SImode);
+ emit_move_insn (int_value_reg, operands[1]);
+ emit_insn (gen_addsi3 (int_value_reg, int_value_reg,
+ GEN_INT (-2147483648LL /* MIN_INT */)));
+
+ fp_value_as_int_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_as_int_xmm, CONST0_RTX (V4SImode),
+ int_value_reg));
+
+ fp_value_xmm = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_sse2_cvtdq2pd (fp_value_xmm,
+ gen_rtx_SUBREG (V4SImode,
+ fp_value_as_int_xmm, 0)));
+
+ real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+
+ int_two31_as_fp_vec = validize_mem (force_const_mem (V2DFmode, v_rtx));
+
+ final_result_xmm = gen_reg_rtx (V2DFmode);
+ emit_move_insn (final_result_xmm, fp_value_xmm);
+ emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
+ int_two31_as_fp_vec));
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+
+/* Convert a signed DImode value into a DFmode, using only SSE.
+ Result returned in an %xmm register. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_sign_DI2DF_sse (rtx operands[])
+{
+ rtx my_operands[2];
+ REAL_VALUE_TYPE rvt_int_two32;
+ rtx rvt_int_two32_vec;
+ rtx fp_value_hi_xmm, fp_value_hi_shifted_xmm;
+ rtx final_result_xmm;
+ rtx int_two32_as_fp, int_two32_as_fp_vec;
+ rtx target = operands[0];
+ rtx input = force_reg (DImode, operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+ gcc_assert (GET_MODE (input) == DImode);
+
+ cfun->uses_vector = 1;
+
+ fp_value_hi_xmm = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_sse2_cvtsi2sd (fp_value_hi_xmm, fp_value_hi_xmm,
+ gen_rtx_SUBREG (SImode, input, 4)));
+
+ real_from_integer (&rvt_int_two32, DFmode, 0x100000000ULL, 0ULL, 1);
+ int_two32_as_fp = const_double_from_real_value (rvt_int_two32, DFmode);
+ rvt_int_two32_vec = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two32_as_fp, DFmode));
+
+ int_two32_as_fp_vec = validize_mem (force_const_mem (V2DFmode,
+ rvt_int_two32_vec));
+
+ fp_value_hi_shifted_xmm = gen_reg_rtx (V2DFmode);
+ emit_move_insn (fp_value_hi_shifted_xmm, fp_value_hi_xmm);
+ emit_insn (gen_sse2_vmmulv2df3 (fp_value_hi_shifted_xmm,
+ fp_value_hi_shifted_xmm,
+ int_two32_as_fp_vec));
+
+ my_operands[0] = gen_reg_rtx (DFmode);
+ my_operands[1] = gen_rtx_SUBREG (SImode, input, 0);
+ (void) ix86_expand_convert_uns_SI2DF_sse (my_operands);
+
+ final_result_xmm = REG_P (target) && GET_MODE (target) == V2DFmode
+ ? target : gen_reg_rtx (V2DFmode);
+ emit_move_insn (final_result_xmm, gen_rtx_SUBREG (V2DFmode,
+ my_operands[0], 0));
+ emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
+ fp_value_hi_shifted_xmm));
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+/* APPLE LOCAL end 4424891 */
+
+/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
+ Create a mask for the sign bit in MODE for an SSE register. If VECT is
+ true, then replicate the mask for all elements of the vector register.
+ If INVERT is true, then create a mask excluding the sign bit. */
+
+rtx
+ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
+{
+ enum machine_mode vec_mode;
+ HOST_WIDE_INT hi, lo;
+ int shift = 63;
+ rtvec v;
+ rtx mask;
+
+ /* Find the sign bit, sign extended to 2*HWI. */
+ if (mode == SFmode)
+ lo = 0x80000000, hi = lo < 0;
+ else if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+
+ if (invert)
+ lo = ~lo, hi = ~hi;
+
+ /* Force this value into the low part of a fp vector constant. */
+ mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
+ mask = gen_lowpart (mode, mask);
+
+ if (mode == SFmode)
+ {
+ if (vect)
+ v = gen_rtvec (4, mask, mask, mask, mask);
+ else
+ v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ vec_mode = V4SFmode;
+ }
+ else
+ {
+ if (vect)
+ v = gen_rtvec (2, mask, mask);
+ else
+ v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
+ vec_mode = V2DFmode;
+ }
+
+ return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
+}
+
+/* Generate code for floating point ABS or NEG. */
+
+void
+ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx mask, set, use, clob, dst, src;
+ bool matching_memory;
+ bool use_sse = false;
+ bool vector_mode = VECTOR_MODE_P (mode);
+ enum machine_mode elt_mode = mode;
+
+ if (vector_mode)
+ {
+ elt_mode = GET_MODE_INNER (mode);
+ use_sse = true;
+ }
+ else if (TARGET_SSE_MATH)
+ use_sse = SSE_FLOAT_MODE_P (mode);
+
+ /* NEG and ABS performed with SSE use bitwise mask operations.
+ Create the appropriate mask now. */
+ if (use_sse)
+ mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
+ else
+ mask = NULL_RTX;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we don't have matching source
+ operands or we're using the x87, do things in registers. */
+ matching_memory = false;
+ if (MEM_P (dst))
+ {
+ if (use_sse && rtx_equal_p (dst, src))
+ matching_memory = true;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ if (vector_mode)
+ {
+ set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ emit_insn (set);
+ }
+ else
+ {
+ set = gen_rtx_fmt_e (code, mode, src);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ if (mask)
+ {
+ use = gen_rtx_USE (VOIDmode, mask);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3, set, use, clob)));
+ }
+ else
+ emit_insn (set);
+ }
+
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Expand a copysign operation. Special case operand 0 being a constant. */
+
+void
+ix86_expand_copysign (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, op0, op1, mask, nmask;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+
+ mode = GET_MODE (dest);
+ vmode = mode == SFmode ? V4SFmode : V2DFmode;
+
+ if (GET_CODE (op0) == CONST_DOUBLE)
+ {
+ rtvec v;
+
+ if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
+ op0 = simplify_unary_operation (ABS, mode, op0, mode);
+
+ if (op0 == CONST0_RTX (mode))
+ op0 = CONST0_RTX (vmode);
+ else
+ {
+ if (mode == SFmode)
+ v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ else
+ v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
+ op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
+ }
+
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
+ else
+ emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
+ }
+ else
+ {
+ nmask = ix86_build_signbit_mask (mode, 0, 1);
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
+ else
+ emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
+ be a constant, and so has already been expanded into a vector constant. */
+
+void
+ix86_split_copysign_const (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, op0, op1, mask, x;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+ mask = operands[3];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ dest = simplify_gen_subreg (vmode, dest, mode, 0);
+ x = gen_rtx_AND (vmode, dest, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ if (op0 != CONST0_RTX (vmode))
+ {
+ x = gen_rtx_IOR (vmode, dest, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
+ so we have to do two masks. */
+
+void
+ix86_split_copysign_var (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, scratch, op0, op1, mask, nmask, x;
+
+ dest = operands[0];
+ scratch = operands[1];
+ op0 = operands[2];
+ op1 = operands[3];
+ nmask = operands[4];
+ mask = operands[5];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ if (rtx_equal_p (op0, op1))
+ {
+ /* Shouldn't happen often (it's useless, obviously), but when it does
+ we'd generate incorrect code if we continue below. */
+ emit_move_insn (dest, op0);
+ return;
+ }
+
+ if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
+ {
+ gcc_assert (REGNO (op1) == REGNO (scratch));
+
+ x = gen_rtx_AND (vmode, scratch, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ dest = mask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_NOT (vmode, dest);
+ x = gen_rtx_AND (vmode, x, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
+ {
+ x = gen_rtx_AND (vmode, scratch, mask);
+ }
+ else /* alternative 2,4 */
+ {
+ gcc_assert (REGNO (mask) == REGNO (scratch));
+ op1 = simplify_gen_subreg (vmode, op1, mode, 0);
+ x = gen_rtx_AND (vmode, scratch, op1);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
+ {
+ dest = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, nmask);
+ }
+ else /* alternative 3,4 */
+ {
+ gcc_assert (REGNO (nmask) == REGNO (dest));
+ dest = nmask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, op0);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+
+ x = gen_rtx_IOR (vmode, dest, scratch);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+ has source and destination with matching CC modes, and that the
+ CC mode is at least as constrained as REQ_MODE. */
+
+int
+ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+ rtx set;
+ enum machine_mode set_mode;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
+
+ set_mode = GET_MODE (SET_DEST (set));
+ switch (set_mode)
+ {
+ case CCNOmode:
+ if (req_mode != CCNOmode
+ && (req_mode != CCmode
+ || XEXP (SET_SRC (set), 1) != const0_rtx))
+ return 0;
+ break;
+ case CCmode:
+ if (req_mode == CCGCmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGCmode:
+ if (req_mode == CCGOCmode || req_mode == CCNOmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGOCmode:
+ if (req_mode == CCZmode)
+ return 0;
+ /* FALLTHRU */
+ case CCZmode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return (GET_MODE (SET_SRC (set)) == set_mode);
+}
+
+/* Generate insn patterns to do an integer compare of OPERANDS. */
+
+static rtx
+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+ enum machine_mode cmpmode;
+ rtx tmp, flags;
+
+ cmpmode = SELECT_CC_MODE (code, op0, op1);
+ flags = gen_rtx_REG (cmpmode, FLAGS_REG);
+
+ /* This is very simple, but making the interface the same as in the
+ FP case makes the rest of the code easier. */
+ tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
+}
+
+/* Figure out whether to use ordered or unordered fp comparisons.
+ Return the appropriate mode to use. */
+
+enum machine_mode
+ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ /* ??? In order to make all comparisons reversible, we do all comparisons
+ non-trapping when compiling for IEEE. Once gcc is able to distinguish
+ all forms trapping and nontrapping comparisons, we can make inequality
+ comparisons trapping again, since it results in better code when using
+ FCOM based compares. */
+ return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
+}
+
+enum machine_mode
+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
+{
+ if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+ return ix86_fp_compare_mode (code);
+ switch (code)
+ {
+ /* Only zero flag is needed. */
+ case EQ: /* ZF=0 */
+ case NE: /* ZF!=0 */
+ return CCZmode;
+ /* Codes needing carry flag. */
+ case GEU: /* CF=0 */
+ case GTU: /* CF=0 & ZF=0 */
+ case LTU: /* CF=1 */
+ case LEU: /* CF=1 | ZF=1 */
+ return CCmode;
+ /* Codes possibly doable only with sign flag when
+ comparing against zero. */
+ case GE: /* SF=OF or SF=0 */
+ case LT: /* SF<>OF or SF=1 */
+ if (op1 == const0_rtx)
+ return CCGOCmode;
+ else
+ /* For other cases Carry flag is not required. */
+ return CCGCmode;
+ /* Codes doable only with sign flag when comparing
+ against zero, but we miss jump instruction for it
+ so we need to use relational tests against overflow
+ that thus needs to be zero. */
+ case GT: /* ZF=0 & SF=OF */
+ case LE: /* ZF=1 | SF<>OF */
+ if (op1 == const0_rtx)
+ return CCNOmode;
+ else
+ return CCGCmode;
+ /* strcmp pattern do (use flags) and combine may ask us for proper
+ mode. */
+ case USE:
+ return CCmode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the fixed registers used for condition codes. */
+
+static bool
+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+ *p1 = FLAGS_REG;
+ *p2 = FPSR_REG;
+ return true;
+}
+
+/* If two condition code modes are compatible, return a condition code
+ mode which is compatible with both. Otherwise, return
+ VOIDmode. */
+
+static enum machine_mode
+ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+ if (m1 == m2)
+ return m1;
+
+ if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
+ return VOIDmode;
+
+ if ((m1 == CCGCmode && m2 == CCGOCmode)
+ || (m1 == CCGOCmode && m2 == CCGCmode))
+ return CCGCmode;
+
+ switch (m1)
+ {
+ default:
+ gcc_unreachable ();
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCZmode:
+ switch (m2)
+ {
+ default:
+ return VOIDmode;
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCZmode:
+ return CCmode;
+ }
+
+ case CCFPmode:
+ case CCFPUmode:
+ /* These are only compatible with themselves, which we already
+ checked above. */
+ return VOIDmode;
+ }
+}
+
+/* Return true if we should use an FCOMI instruction for this fp comparison. */
+
+int
+ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ enum rtx_code swapped_code = swap_condition (code);
+ return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
+ || (ix86_fp_comparison_cost (swapped_code)
+ == ix86_fp_comparison_fcomi_cost (swapped_code)));
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to a fp comparison. The operands are updated in place; the new
+ comparison code is returned. */
+
+static enum rtx_code
+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
+{
+ enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+ rtx op0 = *pop0, op1 = *pop1;
+ enum machine_mode op_mode = GET_MODE (op0);
+ int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
+
+ /* All of the unordered compare instructions only work on registers.
+ The same is true of the fcomi compare instructions. The XFmode
+ compare instructions require registers except when comparing
+ against zero or when converting operand 1 from fixed point to
+ floating point. */
+
+ if (!is_sse
+ && (fpcmp_mode == CCFPUmode
+ || (op_mode == XFmode
+ && ! (standard_80387_constant_p (op0) == 1
+ || standard_80387_constant_p (op1) == 1)
+ && GET_CODE (op1) != FLOAT)
+ || ix86_use_fcomi_compare (code)))
+ {
+ op0 = force_reg (op_mode, op0);
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ {
+ /* %%% We only allow op1 in memory; op0 must be st(0). So swap
+ things around if they appear profitable, otherwise force op0
+ into a register. */
+
+ if (standard_80387_constant_p (op0) == 0
+ || (GET_CODE (op0) == MEM
+ && ! (standard_80387_constant_p (op1) == 0
+ || GET_CODE (op1) == MEM)))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ if (GET_CODE (op0) != REG)
+ op0 = force_reg (op_mode, op0);
+
+ if (CONSTANT_P (op1))
+ {
+ int tmp = standard_80387_constant_p (op1);
+ if (tmp == 0)
+ op1 = validize_mem (force_const_mem (op_mode, op1));
+ else if (tmp == 1)
+ {
+ if (TARGET_CMOVE)
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ op1 = force_reg (op_mode, op1);
+ }
+ }
+
+ /* Try to rearrange the comparison to make it cheaper. */
+ if (ix86_fp_comparison_cost (code)
+ > ix86_fp_comparison_cost (swap_condition (code))
+ && (GET_CODE (op1) == REG || !no_new_pseudos))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ if (GET_CODE (op0) != REG)
+ op0 = force_reg (op_mode, op0);
+ }
+
+ *pop0 = op0;
+ *pop1 = op1;
+ return code;
+}
+
+/* Convert comparison codes we use to represent FP comparison to integer
+ code that will result in proper branch. Return UNKNOWN if no such code
+ is available. */
+
+enum rtx_code
+ix86_fp_compare_code_to_integer (enum rtx_code code)
+{
+ switch (code)
+ {
+ case GT:
+ return GTU;
+ case GE:
+ return GEU;
+ case ORDERED:
+ case UNORDERED:
+ return code;
+ break;
+ case UNEQ:
+ return EQ;
+ break;
+ case UNLT:
+ return LTU;
+ break;
+ case UNLE:
+ return LEU;
+ break;
+ case LTGT:
+ return NE;
+ break;
+ default:
+ return UNKNOWN;
+ }
+}
+
+/* Split comparison code CODE into comparisons we can do using branch
+ instructions. BYPASS_CODE is comparison code for branch that will
+ branch around FIRST_CODE and SECOND_CODE. If some of branches
+ is not required, set value to UNKNOWN.
+ We never require more than two branches. */
+
+void
+ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
+ enum rtx_code *first_code,
+ enum rtx_code *second_code)
+{
+ *first_code = code;
+ *bypass_code = UNKNOWN;
+ *second_code = UNKNOWN;
+
+ /* The fcomi comparison sets flags as follows:
+
+ cmp ZF PF CF
+ > 0 0 0
+ < 0 0 1
+ = 1 0 0
+ un 1 1 1 */
+
+ switch (code)
+ {
+ case GT: /* GTU - CF=0 & ZF=0 */
+ case GE: /* GEU - CF=0 */
+ case ORDERED: /* PF=0 */
+ case UNORDERED: /* PF=1 */
+ case UNEQ: /* EQ - ZF=1 */
+ case UNLT: /* LTU - CF=1 */
+ case UNLE: /* LEU - CF=1 | ZF=1 */
+ case LTGT: /* EQ - ZF=0 */
+ break;
+ case LT: /* LTU - CF=1 - fails on unordered */
+ *first_code = UNLT;
+ *bypass_code = UNORDERED;
+ break;
+ case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
+ *first_code = UNLE;
+ *bypass_code = UNORDERED;
+ break;
+ case EQ: /* EQ - ZF=1 - fails on unordered */
+ *first_code = UNEQ;
+ *bypass_code = UNORDERED;
+ break;
+ case NE: /* NE - ZF=0 - fails on unordered */
+ *first_code = LTGT;
+ *second_code = UNORDERED;
+ break;
+ case UNGE: /* GEU - CF=0 - fails on unordered */
+ *first_code = GE;
+ *second_code = UNORDERED;
+ break;
+ case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
+ *first_code = GT;
+ *second_code = UNORDERED;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!TARGET_IEEE_FP)
+ {
+ *second_code = UNKNOWN;
+ *bypass_code = UNKNOWN;
+ }
+}
+
+/* Return cost of comparison done fcom + arithmetics operations on AX.
+ All following functions do use number of instructions as a cost metrics.
+ In future this should be tweaked to compute bytes for optimize_size and
+ take into account performance of various instructions on various CPUs. */
+static int
+ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
+{
+ if (!TARGET_IEEE_FP)
+ return 4;
+ /* The cost of code output by ix86_expand_fp_compare. */
+ switch (code)
+ {
+ case UNLE:
+ case UNLT:
+ case LTGT:
+ case GT:
+ case GE:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ return 4;
+ break;
+ case LT:
+ case NE:
+ case EQ:
+ case UNGE:
+ return 5;
+ break;
+ case LE:
+ case UNGT:
+ return 6;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return cost of comparison done using fcomi operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_fcomi_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not supported - this
+ prevents gcc from using it. */
+ if (!TARGET_CMOVE)
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
+}
+
+/* Return cost of comparison done using sahf operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_sahf_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not preferred - this
+ avoids gcc from using it. */
+ if (!TARGET_USE_SAHF && !optimize_size)
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
+}
+
+/* Compute cost of the comparison done using any method.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_cost (enum rtx_code code)
+{
+ int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
+ int min;
+
+ fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
+ sahf_cost = ix86_fp_comparison_sahf_cost (code);
+
+ min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
+ if (min > sahf_cost)
+ min = sahf_cost;
+ if (min > fcomi_cost)
+ min = fcomi_cost;
+ return min;
+}
+
+/* Generate insn patterns to do a floating point compare of OPERANDS. */
+
+static rtx
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
+ rtx *second_test, rtx *bypass_test)
+{
+ enum machine_mode fpcmp_mode, intcmp_mode;
+ rtx tmp, tmp2;
+ int cost = ix86_fp_comparison_cost (code);
+ enum rtx_code bypass_code, first_code, second_code;
+
+ fpcmp_mode = ix86_fp_compare_mode (code);
+ code = ix86_prepare_fp_compare_args (code, &op0, &op1);
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Do fcomi/sahf based test when profitable. */
+ if ((bypass_code == UNKNOWN || bypass_test)
+ && (second_code == UNKNOWN || second_test)
+ && ix86_fp_comparison_arithmetics_cost (code) > cost)
+ {
+ if (TARGET_CMOVE)
+ {
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+ tmp);
+ emit_insn (tmp);
+ }
+ else
+ {
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+ emit_insn (gen_x86_sahf_1 (scratch));
+ }
+
+ /* The FP codes work out to act like unsigned. */
+ intcmp_mode = fpcmp_mode;
+ code = first_code;
+ if (bypass_code != UNKNOWN)
+ *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ if (second_code != UNKNOWN)
+ *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ }
+ else
+ {
+ /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+
+ /* In the unordered case, we have to check C2 for NaN's, which
+ doesn't happen to work out to anything nice combination-wise.
+ So do some bit twiddling on the value we've got in AH to come
+ up with an appropriate set of condition codes. */
+
+ intcmp_mode = CCNOmode;
+ switch (code)
+ {
+ case GT:
+ case UNGT:
+ if (code == GT || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
+ intcmp_mode = CCmode;
+ code = GEU;
+ }
+ break;
+ case LT:
+ case UNLT:
+ if (code == LT && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case GE:
+ case UNGE:
+ if (code == GE || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case LE:
+ case UNLE:
+ if (code == LE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = LTU;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = NE;
+ }
+ break;
+ case EQ:
+ case UNEQ:
+ if (code == EQ && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = NE;
+ break;
+ }
+ break;
+ case NE:
+ case LTGT:
+ if (code == NE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x40)));
+ code = NE;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = EQ;
+ }
+ break;
+
+ case UNORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = NE;
+ break;
+ case ORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+}
+
+rtx
+ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
+{
+ rtx op0, op1, ret;
+ op0 = ix86_compare_op0;
+ op1 = ix86_compare_op1;
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ if (ix86_compare_emitted)
+ {
+ ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
+ ix86_compare_emitted = NULL_RTX;
+ }
+ else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+ ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ second_test, bypass_test);
+ else
+ ret = ix86_expand_int_compare (code, op0, op1);
+
+ return ret;
+}
+
+/* Return true if the CODE will result in nontrivial jump sequence. */
+bool
+ix86_fp_jump_nontrivial_p (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ if (!TARGET_CMOVE)
+ return true;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return bypass_code != UNKNOWN || second_code != UNKNOWN;
+}
+
+void
+ix86_expand_branch (enum rtx_code code, rtx label)
+{
+ rtx tmp;
+
+ /* If we have emitted a compare insn, go straight to simple.
+ ix86_expand_compare won't emit anything if ix86_compare_emitted
+ is non NULL. */
+ if (ix86_compare_emitted)
+ goto simple;
+
+ switch (GET_MODE (ix86_compare_op0))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ simple:
+ tmp = ix86_expand_compare (code, NULL, NULL);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ return;
+
+ case SFmode:
+ case DFmode:
+ case XFmode:
+ {
+ rtvec vec;
+ int use_fcomi;
+ enum rtx_code bypass_code, first_code, second_code;
+
+ code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
+ &ix86_compare_op1);
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Check whether we will use the natural sequence with one jump. If
+ so, we can expand jump early. Otherwise delay expansion by
+ creating compound insn to not confuse optimizers. */
+ if (bypass_code == UNKNOWN && second_code == UNKNOWN
+ && TARGET_CMOVE)
+ {
+ ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx, NULL_RTX, NULL_RTX);
+ }
+ else
+ {
+ tmp = gen_rtx_fmt_ee (code, VOIDmode,
+ ix86_compare_op0, ix86_compare_op1);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+ use_fcomi = ix86_use_fcomi_compare (code);
+ vec = rtvec_alloc (3 + !use_fcomi);
+ RTVEC_ELT (vec, 0) = tmp;
+ RTVEC_ELT (vec, 1)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
+ RTVEC_ELT (vec, 2)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
+ if (! use_fcomi)
+ RTVEC_ELT (vec, 3)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
+
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+ }
+ return;
+ }
+
+ case DImode:
+ if (TARGET_64BIT)
+ goto simple;
+ case TImode:
+ /* Expand DImode branch into multiple compare+branch. */
+ {
+ rtx lo[2], hi[2], label2;
+ enum rtx_code code1, code2, code3;
+ enum machine_mode submode;
+
+ if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
+ {
+ tmp = ix86_compare_op0;
+ ix86_compare_op0 = ix86_compare_op1;
+ ix86_compare_op1 = tmp;
+ code = swap_condition (code);
+ }
+ if (GET_MODE (ix86_compare_op0) == DImode)
+ {
+ split_di (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_di (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = SImode;
+ }
+ else
+ {
+ split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = DImode;
+ }
+
+ /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
+ avoid two branches. This costs one extra insn, so disable when
+ optimizing for size. */
+
+ if ((code == EQ || code == NE)
+ && (!optimize_size
+ || hi[1] == const0_rtx || lo[1] == const0_rtx))
+ {
+ rtx xor0, xor1;
+
+ xor1 = hi[0];
+ if (hi[1] != const0_rtx)
+ xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ xor0 = lo[0];
+ if (lo[1] != const0_rtx)
+ xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ tmp = expand_binop (submode, ior_optab, xor1, xor0,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ ix86_expand_branch (code, label);
+ return;
+ }
+
+ /* Otherwise, if we are doing less-than or greater-or-equal-than,
+ op1 is a constant and the low word is zero, then we can just
+ examine the high word. */
+
+ if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
+ switch (code)
+ {
+ case LT: case LTU: case GE: case GEU:
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ default:
+ break;
+ }
+
+ /* Otherwise, we need two or three jumps. */
+
+ label2 = gen_label_rtx ();
+
+ code1 = code;
+ code2 = swap_condition (code);
+ code3 = unsigned_condition (code);
+
+ switch (code)
+ {
+ case LT: case GT: case LTU: case GTU:
+ break;
+
+ case LE: code1 = LT; code2 = GT; break;
+ case GE: code1 = GT; code2 = LT; break;
+ case LEU: code1 = LTU; code2 = GTU; break;
+ case GEU: code1 = GTU; code2 = LTU; break;
+
+ case EQ: code1 = UNKNOWN; code2 = NE; break;
+ case NE: code2 = UNKNOWN; break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /*
+ * a < b =>
+ * if (hi(a) < hi(b)) goto true;
+ * if (hi(a) > hi(b)) goto false;
+ * if (lo(a) < lo(b)) goto true;
+ * false:
+ */
+
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+
+ if (code1 != UNKNOWN)
+ ix86_expand_branch (code1, label);
+ if (code2 != UNKNOWN)
+ ix86_expand_branch (code2, label2);
+
+ ix86_compare_op0 = lo[0];
+ ix86_compare_op1 = lo[1];
+ ix86_expand_branch (code3, label);
+
+ if (code2 != UNKNOWN)
+ emit_label (label2);
+ return;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Split branch based on floating point condition. */
+void
+ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
+ rtx target1, rtx target2, rtx tmp, rtx pushed)
+{
+ rtx second, bypass;
+ rtx label = NULL_RTX;
+ rtx condition;
+ int bypass_probability = -1, second_probability = -1, probability = -1;
+ rtx i;
+
+ if (target2 != pc_rtx)
+ {
+ rtx tmp = target2;
+ code = reverse_condition_maybe_unordered (code);
+ target2 = target1;
+ target1 = tmp;
+ }
+
+ condition = ix86_expand_fp_compare (code, op1, op2,
+ tmp, &second, &bypass);
+
+ /* Remove pushed operand from stack. */
+ if (pushed)
+ ix86_free_from_memory (GET_MODE (pushed));
+
+ if (split_branch_probability >= 0)
+ {
+ /* Distribute the probabilities across the jumps.
+ Assume the BYPASS and SECOND to be always test
+ for UNORDERED. */
+ probability = split_branch_probability;
+
+ /* Value of 1 is low enough to make no need for probability
+ to be updated. Later we may run some experiments and see
+ if unordered values are more frequent in practice. */
+ if (bypass)
+ bypass_probability = 1;
+ if (second)
+ second_probability = 1;
+ }
+ if (bypass != NULL_RTX)
+ {
+ label = gen_label_rtx ();
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ bypass,
+ gen_rtx_LABEL_REF (VOIDmode,
+ label),
+ pc_rtx)));
+ if (bypass_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (bypass_probability),
+ REG_NOTES (i));
+ }
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ condition, target1, target2)));
+ if (probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (probability),
+ REG_NOTES (i));
+ if (second != NULL_RTX)
+ {
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
+ target2)));
+ if (second_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (second_probability),
+ REG_NOTES (i));
+ }
+ if (label != NULL_RTX)
+ emit_label (label);
+}
+
+int
+ix86_expand_setcc (enum rtx_code code, rtx dest)
+{
+ rtx ret, tmp, tmpreg, equiv;
+ rtx second_test, bypass_test;
+
+ if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
+ return 0; /* FAIL */
+
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ ret = ix86_expand_compare (code, &second_test, &bypass_test);
+ PUT_MODE (ret, QImode);
+
+ tmp = dest;
+ tmpreg = dest;
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
+ if (bypass_test || second_test)
+ {
+ rtx test = second_test;
+ int bypass = 0;
+ rtx tmp2 = gen_reg_rtx (QImode);
+ if (bypass_test)
+ {
+ gcc_assert (!second_test);
+ test = bypass_test;
+ bypass = 1;
+ PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
+ }
+ PUT_MODE (test, QImode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
+
+ if (bypass)
+ emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
+ else
+ emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
+ }
+
+ /* Attach a REG_EQUAL note describing the comparison result. */
+ if (ix86_compare_op0 && ix86_compare_op1)
+ {
+ equiv = simplify_gen_relational (code, QImode,
+ GET_MODE (ix86_compare_op0),
+ ix86_compare_op0, ix86_compare_op1);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
+ }
+
+ return 1; /* DONE */
+}
+
+/* Expand comparison setting or clearing carry flag. Return true when
+ successful and set pop for the operation. */
+static bool
+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
+{
+ enum machine_mode mode =
+ GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+ /* Do not handle DImode compares that go through special path. Also we can't
+ deal with FP compares yet. This is possible to add. */
+ if (mode == (TARGET_64BIT ? TImode : DImode))
+ return false;
+ if (FLOAT_MODE_P (mode))
+ {
+ rtx second_test = NULL, bypass_test = NULL;
+ rtx compare_op, compare_seq;
+
+ /* Shortcut: following common codes never translate into carry flag compares. */
+ if (code == EQ || code == NE || code == UNEQ || code == LTGT
+ || code == ORDERED || code == UNORDERED)
+ return false;
+
+ /* These comparisons require zero flag; swap operands so they won't. */
+ if ((code == GT || code == UNLE || code == LE || code == UNGT)
+ && !TARGET_IEEE_FP)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ /* Try to expand the comparison and verify that we end up with carry flag
+ based comparison. This is fails to be true only when we decide to expand
+ comparison using arithmetic that is not too common scenario. */
+ start_sequence ();
+ compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ if (second_test || bypass_test)
+ return false;
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+ else
+ code = GET_CODE (compare_op);
+ if (code != LTU && code != GEU)
+ return false;
+ emit_insn (compare_seq);
+ *pop = compare_op;
+ return true;
+ }
+ if (!INTEGRAL_MODE_P (mode))
+ return false;
+ switch (code)
+ {
+ case LTU:
+ case GEU:
+ break;
+
+ /* Convert a==0 into (unsigned)a<1. */
+ case EQ:
+ case NE:
+ if (op1 != const0_rtx)
+ return false;
+ op1 = const1_rtx;
+ code = (code == EQ ? LTU : GEU);
+ break;
+
+ /* Convert a>b into b<a or a>=b-1. */
+ case GTU:
+ case LEU:
+ if (GET_CODE (op1) == CONST_INT)
+ {
+ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+ /* Bail out on overflow. We still can swap operands but that
+ would force loading of the constant into register. */
+ if (op1 == const0_rtx
+ || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+ return false;
+ code = (code == GTU ? GEU : LTU);
+ }
+ else
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ code = (code == GTU ? LTU : GEU);
+ }
+ break;
+
+ /* Convert a>=0 into (unsigned)a<0x80000000. */
+ case LT:
+ case GE:
+ if (mode == DImode || op1 != const0_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LT ? GEU : LTU);
+ break;
+ case LE:
+ case GT:
+ if (mode == DImode || op1 != constm1_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LE ? GEU : LTU);
+ break;
+
+ default:
+ return false;
+ }
+ /* Swapping operands may cause constant to appear as first operand. */
+ if (!nonimmediate_operand (op0, VOIDmode))
+ {
+ if (no_new_pseudos)
+ return false;
+ op0 = force_reg (mode, op0);
+ }
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ *pop = ix86_expand_compare (code, NULL, NULL);
+ gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
+ return true;
+}
+
+int
+ix86_expand_int_movcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]), compare_code;
+ rtx compare_seq, compare_op;
+ rtx second_test, bypass_test;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool sign_bit_compare_p = false;;
+
+ start_sequence ();
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ compare_code = GET_CODE (compare_op);
+
+ if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
+ || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+ sign_bit_compare_p = true;
+
+ /* Don't attempt mode expansion here -- if we had to expand 5 or 6
+ HImode insns, we'd be swallowed in word prefix ops. */
+
+ if ((mode != HImode || TARGET_FAST_PREFIX)
+ && (mode != (TARGET_64BIT ? TImode : DImode))
+ && GET_CODE (operands[2]) == CONST_INT
+ && GET_CODE (operands[3]) == CONST_INT)
+ {
+ rtx out = operands[0];
+ HOST_WIDE_INT ct = INTVAL (operands[2]);
+ HOST_WIDE_INT cf = INTVAL (operands[3]);
+ HOST_WIDE_INT diff;
+
+ diff = ct - cf;
+ /* Sign bit compares are better done using shifts than we do by using
+ sbb. */
+ if (sign_bit_compare_p
+ || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ {
+ /* Detect overlap between destination and compare sources. */
+ rtx tmp = out;
+
+ if (!sign_bit_compare_p)
+ {
+ bool fpcmp = false;
+
+ compare_code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ compare_code = ix86_fp_compare_code_to_integer (compare_code);
+ }
+
+ /* To simplify rest of code, restrict to the GEU case. */
+ if (compare_code == LTU)
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ else
+ {
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ diff = ct - cf;
+
+ if (reg_overlap_mentioned_p (out, ix86_compare_op0)
+ || reg_overlap_mentioned_p (out, ix86_compare_op1))
+ tmp = gen_reg_rtx (mode);
+
+ if (mode == DImode)
+ emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
+ else
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
+ }
+ else
+ {
+ if (code == GT || code == GE)
+ code = reverse_condition (code);
+ else
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ diff = ct - cf;
+ }
+ tmp = emit_store_flag (tmp, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+
+ if (diff == 1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [addl dest, ct]
+ *
+ * Size 5 - 8.
+ */
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (cf == -1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * orl $ct, dest
+ *
+ * Size 8.
+ */
+ tmp = expand_simple_binop (mode, IOR,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (diff == -1 && ct)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * notl dest
+ * [addl dest, cf]
+ *
+ * Size 8 - 11.
+ */
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ if (cf)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (cf),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [notl dest]
+ * andl cf - ct, dest
+ * [addl dest, ct]
+ *
+ * Size 8 - 11.
+ */
+
+ if (cf == 0)
+ {
+ cf = ct;
+ ct = 0;
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ }
+
+ tmp = expand_simple_binop (mode, AND,
+ copy_rtx (tmp),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+
+ if (!rtx_equal_p (tmp, out))
+ emit_move_insn (copy_rtx (out), copy_rtx (tmp));
+
+ return 1; /* DONE */
+ }
+
+ if (diff < 0)
+ {
+ HOST_WIDE_INT tmp;
+ tmp = ct, ct = cf, cf = tmp;
+ diff = -diff;
+ if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
+ {
+ /* We may be reversing unordered compare to normal compare, that
+ is not valid in general (we may convert non-trapping condition
+ to trapping one), however on i386 we currently emit all
+ comparisons unordered. */
+ compare_code = reverse_condition_maybe_unordered (compare_code);
+ code = reverse_condition_maybe_unordered (code);
+ }
+ else
+ {
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ }
+
+ compare_code = UNKNOWN;
+ if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
+ && GET_CODE (ix86_compare_op1) == CONST_INT)
+ {
+ if (ix86_compare_op1 == const0_rtx
+ && (code == LT || code == GE))
+ compare_code = code;
+ else if (ix86_compare_op1 == constm1_rtx)
+ {
+ if (code == LE)
+ compare_code = LT;
+ else if (code == GT)
+ compare_code = GE;
+ }
+ }
+
+ /* Optimize dest = (op0 < 0) ? -1 : cf. */
+ if (compare_code != UNKNOWN
+ && GET_MODE (ix86_compare_op0) == GET_MODE (out)
+ && (cf == -1 || ct == -1))
+ {
+ /* If lea code below could be used, only optimize
+ if it results in a 2 insn sequence. */
+
+ if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ || (compare_code == LT && ct == -1)
+ || (compare_code == GE && cf == -1))
+ {
+ /*
+ * notl op1 (if necessary)
+ * sarl $31, op1
+ * orl cf, op1
+ */
+ if (ct != -1)
+ {
+ cf = ct;
+ ct = -1;
+ code = reverse_condition (code);
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+
+ out = expand_simple_binop (mode, IOR,
+ out, GEN_INT (cf),
+ out, 1, OPTAB_DIRECT);
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+
+ return 1; /* DONE */
+ }
+ }
+
+
+ if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
+ && (mode != DImode
+ || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
+ {
+ /*
+ * xorl dest,dest
+ * cmpl op1,op2
+ * setcc dest
+ * lea cf(dest*(ct-cf)),dest
+ *
+ * Size 14.
+ *
+ * This also catches the degenerate setcc-only case.
+ */
+
+ rtx tmp;
+ int nops;
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ nops = 0;
+ /* On x86_64 the lea instruction operates on Pmode, so we need
+ to get arithmetics done in proper mode to match. */
+ if (diff == 1)
+ tmp = copy_rtx (out);
+ else
+ {
+ rtx out1;
+ out1 = copy_rtx (out);
+ tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
+ nops++;
+ if (diff & 1)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, out1);
+ nops++;
+ }
+ }
+ if (cf != 0)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+ nops++;
+ }
+ if (!rtx_equal_p (tmp, out))
+ {
+ if (nops == 1)
+ out = force_operand (tmp, copy_rtx (out));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
+ }
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * General case: Jumpful:
+ * xorl dest,dest cmpl op1, op2
+ * cmpl op1, op2 movl ct, dest
+ * setcc dest jcc 1f
+ * decl dest movl cf, dest
+ * andl (cf-ct),dest 1:
+ * addl ct,dest
+ *
+ * Size 20. Size 14.
+ *
+ * This is reasonably steep, but branch mispredict costs are
+ * high on modern cpus, so consider failing only if optimizing
+ * for space.
+ */
+
+ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ && BRANCH_COST >= 2)
+ {
+ if (cf == 0)
+ {
+ cf = ct;
+ ct = 0;
+ if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
+ /* We may be reversing unordered compare to normal compare,
+ that is not valid in general (we may convert non-trapping
+ condition to trapping one), however on i386 we currently
+ emit all comparisons unordered. */
+ code = reverse_condition_maybe_unordered (code);
+ else
+ {
+ code = reverse_condition (code);
+ if (compare_code != UNKNOWN)
+ compare_code = reverse_condition (compare_code);
+ }
+ }
+
+ if (compare_code != UNKNOWN)
+ {
+ /* notl op1 (if needed)
+ sarl $31, op1
+ andl (cf-ct), op1
+ addl ct, op1
+
+ For x < 0 (resp. x <= -1) there will be no notl,
+ so if possible swap the constants to get rid of the
+ complement.
+ True/false will be -1/0 while code below (store flag
+ followed by decrement) is 0/-1, so the constants need
+ to be exchanged once more. */
+
+ if (compare_code == GE || !cf)
+ {
+ code = reverse_condition (code);
+ compare_code = LT;
+ }
+ else
+ {
+ HOST_WIDE_INT tmp = cf;
+ cf = ct;
+ ct = tmp;
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+ else
+ {
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ }
+
+ out = expand_simple_binop (mode, AND, copy_rtx (out),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (ct)
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+ }
+
+ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ {
+ /* Try a few things more with specific constants and a variable. */
+
+ optab op;
+ rtx var, orig_out, out, tmp;
+
+ if (BRANCH_COST <= 2)
+ return 0; /* FAIL */
+
+ /* If one of the two operands is an interesting constant, load a
+ constant with the above and mask it in with a logical operation. */
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ var = operands[3];
+ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
+ operands[3] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
+ operands[3] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ var = operands[2];
+ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
+ operands[2] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
+ operands[2] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else
+ return 0; /* FAIL */
+
+ orig_out = operands[0];
+ tmp = gen_reg_rtx (mode);
+ operands[0] = tmp;
+
+ /* Recurse to get the constant loaded. */
+ if (ix86_expand_int_movcc (operands) == 0)
+ return 0; /* FAIL */
+
+ /* Mask in the interesting variable. */
+ out = expand_binop (mode, op, var, tmp, orig_out, 0,
+ OPTAB_WIDEN);
+ if (!rtx_equal_p (out, orig_out))
+ emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * For comparison with above,
+ *
+ * movl cf,dest
+ * movl ct,tmp
+ * cmpl op1,op2
+ * cmovcc tmp,dest
+ *
+ * Size 15.
+ */
+
+ if (! nonimmediate_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ if (! nonimmediate_operand (operands[3], mode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ if (! register_operand (operands[2], VOIDmode)
+ && (mode == QImode
+ || ! register_operand (operands[3], VOIDmode)))
+ operands[2] = force_reg (mode, operands[2]);
+
+ if (mode == QImode
+ && ! register_operand (operands[3], VOIDmode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ emit_insn (compare_seq);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode,
+ compare_op, operands[2],
+ operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ bypass_test,
+ copy_rtx (operands[3]),
+ copy_rtx (operands[0]))));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ second_test,
+ copy_rtx (operands[2]),
+ copy_rtx (operands[0]))));
+
+ return 1; /* DONE */
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to an sse comparison with a mask result. Thus we differ a bit from
+ ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+ The DEST operand exists to help determine whether to commute commutative
+ operators. The POP0/POP1 operands are updated in place. The new
+ comparison code is returned, or UNKNOWN if not implementable. */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+ rtx *pop0, rtx *pop1)
+{
+ rtx tmp;
+
+ switch (code)
+ {
+ case LTGT:
+ case UNEQ:
+ /* We have no LTGT as an operator. We could implement it with
+ NE & ORDERED, but this requires an extra temporary. It's
+ not clear that it's worth it. */
+ return UNKNOWN;
+
+ case LT:
+ case LE:
+ case UNGT:
+ case UNGE:
+ /* These are supported directly. */
+ break;
+
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ /* For commutative operators, try to canonicalize the destination
+ operand to be first in the comparison - this helps reload to
+ avoid extra moves. */
+ if (!dest || !rtx_equal_p (dest, *pop1))
+ break;
+ /* FALLTHRU */
+
+ case GE:
+ case GT:
+ case UNLE:
+ case UNLT:
+ /* These are not supported directly. Swap the comparison operands
+ to transform into something that is supported. */
+ tmp = *pop0;
+ *pop0 = *pop1;
+ *pop1 = tmp;
+ code = swap_condition (code);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+ semantics. Note that this is IEEE safe, as long as we don't
+ interchange the operands.
+
+ Returns FALSE if this conditional move doesn't match a MIN/MAX,
+ and TRUE if the operation is successful and instructions are emitted. */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+ rtx cmp_op1, rtx if_true, rtx if_false)
+{
+ enum machine_mode mode;
+ bool is_min;
+ rtx tmp;
+
+ if (code == LT)
+ ;
+ else if (code == UNGE)
+ {
+ tmp = if_true;
+ if_true = if_false;
+ if_false = tmp;
+ }
+ else
+ return false;
+
+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+ is_min = true;
+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+ is_min = false;
+ else
+ return false;
+
+ mode = GET_MODE (dest);
+
+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+ but MODE may be a vector mode and thus not appropriate. */
+ if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+ {
+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+ rtvec v;
+
+ if_true = force_reg (mode, if_true);
+ v = gen_rtvec (2, if_true, if_false);
+ tmp = gen_rtx_UNSPEC (mode, v, u);
+ }
+ else
+ {
+ code = is_min ? SMIN : SMAX;
+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+ return true;
+}
+
+/* Expand an sse vector comparison. Return the register with the result. */
+
+static rtx
+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+ rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx x;
+
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ if (optimize
+ || reg_overlap_mentioned_p (dest, op_true)
+ || reg_overlap_mentioned_p (dest, op_false))
+ dest = gen_reg_rtx (mode);
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ return dest;
+}
+
+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
+ operations. This is used for both scalar and vector conditional moves. */
+
+static void
+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx t2, t3, x;
+
+ if (op_false == CONST0_RTX (mode))
+ {
+ op_true = force_reg (mode, op_true);
+ x = gen_rtx_AND (mode, cmp, op_true);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (op_true == CONST0_RTX (mode))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, cmp);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Expand a floating-point conditional move. Return true if successful. */
+
+int
+ix86_expand_fp_movcc (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx tmp, compare_op, second_test, bypass_test;
+
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ {
+ enum machine_mode cmode;
+
+ /* Since we've no cmove for sse registers, don't force bad register
+ allocation just to gain access to it. Deny movcc when the
+ comparison mode doesn't match the move mode. */
+ cmode = GET_MODE (ix86_compare_op0);
+ if (cmode == VOIDmode)
+ cmode = GET_MODE (ix86_compare_op1);
+ if (cmode != mode)
+ return 0;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &ix86_compare_op0,
+ &ix86_compare_op1);
+ if (code == UNKNOWN)
+ return 0;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2],
+ operands[3]))
+ return 1;
+
+ tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2], operands[3]);
+ ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
+ return 1;
+ }
+
+ /* The floating point conditional move instructions don't directly
+ support conditions resulting from a signed integer comparison. */
+
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+
+ /* The floating point conditional move instructions don't directly
+ support signed integer comparisons. */
+
+ if (!fcmov_comparison_operator (compare_op, VOIDmode))
+ {
+ gcc_assert (!second_test && !bypass_test);
+ tmp = gen_reg_rtx (QImode);
+ ix86_expand_setcc (code, tmp);
+ code = NE;
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ }
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ operands[2], operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, bypass_test,
+ operands[3], operands[0])));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, second_test,
+ operands[2], operands[0])));
+
+ return 1;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+ rather than a movcc operation. */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[3]);
+ rtx cmp;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &operands[4], &operands[5]);
+ if (code == UNKNOWN)
+ return false;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+ operands[5], operands[1], operands[2]))
+ return true;
+
+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+ operands[1], operands[2]);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+}
+
+/* Expand a signed integral vector conditional move. */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[3]);
+ bool negate = false;
+ rtx x, cop0, cop1;
+
+ cop0 = operands[4];
+ cop1 = operands[5];
+
+ /* Canonicalize the comparison to EQ, GT, GTU. */
+ switch (code)
+ {
+ case EQ:
+ case GT:
+ case GTU:
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
+ code = reverse_condition (code);
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
+ code = swap_condition (code);
+ x = cop0, cop0 = cop1, cop1 = x;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Only SSE4.1/SSE4.2 supports V2DImode. */
+ if (mode == V2DImode)
+ {
+ switch (code)
+ {
+ case EQ:
+ /* SSE4.1 supports EQ. */
+ if (!TARGET_SSE4_1)
+ return false;
+ break;
+
+ case GT:
+ case GTU:
+ /* SSE4.2 supports GT/GTU. */
+ if (!TARGET_SSE4_2)
+ return false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ /* Unsigned parallel compare is not supported by the hardware. Play some
+ tricks to turn this into a signed comparison against 0. */
+ if (code == GTU)
+ {
+ cop0 = force_reg (mode, cop0);
+
+ switch (mode)
+ {
+ case V4SImode:
+ {
+ rtx t1, t2, mask;
+
+ /* Perform a parallel modulo subtraction. */
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_subv4si3 (t1, cop0, cop1));
+
+ /* Extract the original sign bit of op0. */
+ mask = GEN_INT (-0x80000000);
+ mask = gen_rtx_CONST_VECTOR (mode,
+ gen_rtvec (4, mask, mask, mask, mask));
+ mask = force_reg (mode, mask);
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_andv4si3 (t2, cop0, mask));
+
+ /* XOR it back into the result of the subtraction. This results
+ in the sign bit set iff we saw unsigned underflow. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_xorv4si3 (x, t1, t2));
+
+ code = GT;
+ }
+ break;
+
+ case V16QImode:
+ case V8HImode:
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+ code = EQ;
+ negate = !negate;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
+ }
+
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+
+ ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+ operands[2-negate]);
+ return true;
+}
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
+ true if we should do zero extension, else sign extension. HIGH_P is
+ true if we want the N/2 high elements, else the low elements. */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx, rtx);
+ rtx se, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = gen_lowpart (imode, operands[0]);
+
+ if (unsigned_p)
+ se = force_reg (imode, CONST0_RTX (imode));
+ else
+ se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
+
+ emit_insn (unpack (dest, operands[1], se));
+}
+
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with SSE4.1 instructions. */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx);
+ rtx src, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = operands[0];
+ if (high_p)
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ src = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (64)));
+ }
+ else
+ src = operands[1];
+
+ emit_insn (unpack (dest, src));
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Expand conditional increment or decrement using adb/sbb instructions.
+ The default case using setcc followed by the conditional move can be
+ done by generic code. */
+int
+ix86_expand_int_addcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx compare_op;
+ rtx val = const0_rtx;
+ bool fpcmp = false;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (operands[3] != const1_rtx
+ && operands[3] != constm1_rtx)
+ return 0;
+ if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ return 0;
+ code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+
+ if (code != LTU)
+ {
+ val = constm1_rtx;
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ PUT_MODE (compare_op, mode);
+
+ /* Construct either adc or sbb insn. */
+ if ((code == LTU) == (operands[3] == constm1_rtx))
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return 1; /* DONE */
+}
+
+
+/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
+ works for floating pointer parameters and nonoffsetable memories.
+ For pushes, it returns just stack offsets; the values will be saved
+ in the right order. Maximally three parts are generated. */
+
+static int
+ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
+{
+ int size;
+
+ if (!TARGET_64BIT)
+ size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+ else
+ size = (GET_MODE_SIZE (mode) + 4) / 8;
+
+ gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
+ gcc_assert (size >= 2 && size <= 3);
+
+ /* Optimize constant pool reference to immediates. This is used by fp
+ moves, that force all constants to memory to allow combining. */
+ if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
+ {
+ rtx tmp = maybe_get_pool_constant (operand);
+ if (tmp)
+ operand = tmp;
+ }
+
+ if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
+ {
+ /* The only non-offsetable memories we handle are pushes. */
+ int ok = push_operand (operand, VOIDmode);
+
+ gcc_assert (ok);
+
+ operand = copy_rtx (operand);
+ PUT_MODE (operand, Pmode);
+ parts[0] = parts[1] = parts[2] = operand;
+ return size;
+ }
+
+ if (GET_CODE (operand) == CONST_VECTOR)
+ {
+ enum machine_mode imode = int_mode_for_mode (mode);
+ /* Caution: if we looked through a constant pool memory above,
+ the operand may actually have a different mode now. That's
+ ok, since we want to pun this all the way back to an integer. */
+ operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
+ gcc_assert (operand != NULL);
+ mode = imode;
+ }
+
+ if (!TARGET_64BIT)
+ {
+ if (mode == DImode)
+ split_di (&operand, 1, &parts[0], &parts[1]);
+ else
+ {
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
+ if (size == 3)
+ parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, SImode, 0);
+ parts[0] = operand;
+ parts[1] = adjust_address (operand, SImode, 4);
+ if (size == 3)
+ parts[2] = adjust_address (operand, SImode, 8);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ switch (mode)
+ {
+ case XFmode:
+ REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+ parts[2] = gen_int_mode (l[2], SImode);
+ break;
+ case DFmode:
+ REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ parts[1] = gen_int_mode (l[1], SImode);
+ parts[0] = gen_int_mode (l[0], SImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ if (mode == TImode)
+ split_ti (&operand, 1, &parts[0], &parts[1]);
+ if (mode == XFmode || mode == TFmode)
+ {
+ enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, DImode, 0);
+ parts[0] = operand;
+ parts[1] = adjust_address (operand, upper_mode, 8);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ real_to_target (l, &r, mode);
+
+ /* Do not use shift by 32 to avoid warning on 32bit systems. */
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[0]
+ = gen_int_mode
+ ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
+ DImode);
+ else
+ parts[0] = immed_double_const (l[0], l[1], DImode);
+
+ if (upper_mode == SImode)
+ parts[1] = gen_int_mode (l[2], SImode);
+ else if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[1]
+ = gen_int_mode
+ ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
+ DImode);
+ else
+ parts[1] = immed_double_const (l[2], l[3], DImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+
+ return size;
+}
+
+/* Emit insns to perform a move or push of DI, DF, and XF values.
+ Return false when normal moves are needed; true when all required
+ insns have been emitted. Operands 2-4 contain the input values
+ int the correct order; operands 5-7 contain the output values. */
+
+void
+ix86_split_long_move (rtx operands[])
+{
+ rtx part[2][3];
+ int nparts;
+ int push = 0;
+ int collisions = 0;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ /* The DFmode expanders may ask us to move double.
+ For 64bit target this is single move. By hiding the fact
+ here we simplify i386.md splitters. */
+ if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
+ {
+ /* Optimize constant pool reference to immediates. This is used by
+ fp moves, that force all constants to memory to allow combining. */
+
+ if (GET_CODE (operands[1]) == MEM
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+ operands[1] = get_pool_constant (XEXP (operands[1], 0));
+ if (push_operand (operands[0], VOIDmode))
+ {
+ operands[0] = copy_rtx (operands[0]);
+ PUT_MODE (operands[0], Pmode);
+ }
+ else
+ operands[0] = gen_lowpart (DImode, operands[0]);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+
+ /* The only non-offsettable memory we handle is push. */
+ if (push_operand (operands[0], VOIDmode))
+ push = 1;
+ else
+ gcc_assert (GET_CODE (operands[0]) != MEM
+ || offsettable_memref_p (operands[0]));
+
+ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
+
+ /* When emitting push, take care for source operands on the stack. */
+ if (push && GET_CODE (operands[1]) == MEM
+ && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+ {
+ /* APPLE LOCAL begin 4099768 */
+ if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+ part[1][2] = adjust_address (part[1][2], SImode, 4);
+ /* APPLE LOCAL end 4099768 */
+ if (nparts == 3)
+ part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
+ XEXP (part[1][2], 0));
+ part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
+ XEXP (part[1][1], 0));
+ }
+
+ /* We need to do copy in the right order in case an address register
+ of the source overlaps the destination. */
+ if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
+ {
+ if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
+ collisions++;
+ if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
+ collisions++;
+ if (nparts == 3
+ && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
+ collisions++;
+
+ /* Collision in the middle part can be handled by reordering. */
+ if (collisions == 1 && nparts == 3
+ && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
+ {
+ rtx tmp;
+ tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+ tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+ }
+
+ /* If there are more collisions, we can't handle it by reordering.
+ Do an lea to the last part and use only one colliding move. */
+ else if (collisions > 1)
+ {
+ rtx base;
+
+ collisions = 1;
+
+ base = part[0][nparts - 1];
+
+ /* Handle the case when the last part isn't valid for lea.
+ Happens in 64-bit mode storing the 12-byte XFmode. */
+ if (GET_MODE (base) != Pmode)
+ base = gen_rtx_REG (Pmode, REGNO (base));
+
+ emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+ part[1][0] = replace_equiv_address (part[1][0], base);
+ part[1][1] = replace_equiv_address (part[1][1],
+ plus_constant (base, UNITS_PER_WORD));
+ if (nparts == 3)
+ part[1][2] = replace_equiv_address (part[1][2],
+ plus_constant (base, 8));
+ }
+ }
+
+ if (push)
+ {
+ if (!TARGET_64BIT)
+ {
+ if (nparts == 3)
+ {
+ if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
+ emit_move_insn (part[0][2], part[1][2]);
+ }
+ }
+ else
+ {
+ /* In 64bit mode we don't have 32bit push available. In case this is
+ register, it is OK - we will just use larger counterpart. We also
+ retype memory - these comes from attempt to avoid REX prefix on
+ moving of second half of TFmode value. */
+ if (GET_MODE (part[1][1]) == SImode)
+ {
+ switch (GET_CODE (part[1][1]))
+ {
+ case MEM:
+ part[1][1] = adjust_address (part[1][1], DImode, 0);
+ break;
+
+ case REG:
+ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (GET_MODE (part[1][0]) == SImode)
+ part[1][0] = part[1][1];
+ }
+ }
+ emit_move_insn (part[0][1], part[1][1]);
+ emit_move_insn (part[0][0], part[1][0]);
+ return;
+ }
+
+ /* Choose correct order to not overwrite the source before it is copied. */
+ if ((REG_P (part[0][0])
+ && REG_P (part[1][1])
+ && (REGNO (part[0][0]) == REGNO (part[1][1])
+ || (nparts == 3
+ && REGNO (part[0][0]) == REGNO (part[1][2]))))
+ || (collisions > 0
+ && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
+ {
+ if (nparts == 3)
+ {
+ operands[2] = part[0][2];
+ operands[3] = part[0][1];
+ operands[4] = part[0][0];
+ operands[5] = part[1][2];
+ operands[6] = part[1][1];
+ operands[7] = part[1][0];
+ }
+ else
+ {
+ operands[2] = part[0][1];
+ operands[3] = part[0][0];
+ operands[5] = part[1][1];
+ operands[6] = part[1][0];
+ }
+ }
+ else
+ {
+ if (nparts == 3)
+ {
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[4] = part[0][2];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
+ operands[7] = part[1][2];
+ }
+ else
+ {
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
+ }
+ }
+
+ /* If optimizing for size, attempt to locally unCSE nonzero constants. */
+ if (optimize_size)
+ {
+ if (GET_CODE (operands[5]) == CONST_INT
+ && operands[5] != const0_rtx
+ && REG_P (operands[2]))
+ {
+ if (GET_CODE (operands[6]) == CONST_INT
+ && INTVAL (operands[6]) == INTVAL (operands[5]))
+ operands[6] = operands[2];
+
+ if (nparts == 3
+ && GET_CODE (operands[7]) == CONST_INT
+ && INTVAL (operands[7]) == INTVAL (operands[5]))
+ operands[7] = operands[2];
+ }
+
+ if (nparts == 3
+ && GET_CODE (operands[6]) == CONST_INT
+ && operands[6] != const0_rtx
+ && REG_P (operands[3])
+ && GET_CODE (operands[7]) == CONST_INT
+ && INTVAL (operands[7]) == INTVAL (operands[6]))
+ operands[7] = operands[3];
+ }
+
+ emit_move_insn (operands[2], operands[5]);
+ emit_move_insn (operands[3], operands[6]);
+ if (nparts == 3)
+ emit_move_insn (operands[4], operands[7]);
+
+ return;
+}
+
+/* Helper function of ix86_split_ashl used to generate an SImode/DImode
+ left shift by a constant, either using a single shift or
+ a sequence of add instructions. */
+
+static void
+ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
+{
+ if (count == 1)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ else if (!optimize_size
+ && count * ix86_cost->add <= ix86_cost->shift_const)
+ {
+ int i;
+ for (i=0; i<count; i++)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (operand, operand, GEN_INT (count)));
+}
+
+void
+ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (high[0], low[1]);
+ emit_move_insn (low[0], const0_rtx);
+
+ if (count > single_width)
+ ix86_expand_ashl_const (high[0], count - single_width, mode);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld_1
+ : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
+ ix86_expand_ashl_const (low[0], count, mode);
+ }
+ return;
+ }
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ if (operands[1] == const1_rtx)
+ {
+ /* Assuming we've chosen a QImode capable registers, then 1 << N
+ can be done with two 32/64-bit shifts, no branches, no cmoves. */
+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+ {
+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+ ix86_expand_clear (low[0]);
+ ix86_expand_clear (high[0]);
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
+
+ d = gen_lowpart (QImode, low[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+
+ d = gen_lowpart (QImode, high[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_NE (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ }
+
+ /* Otherwise, we can get the same results by manually performing
+ a bit extract operation on bit 5/6, and then performing the two
+ shifts. The two methods of getting 0/1 into low/high are exactly
+ the same size. Avoiding the shift in the bit extract case helps
+ pentium4 a bit; no one else seems to care much either way. */
+ else
+ {
+ rtx x;
+
+ if (TARGET_PARTIAL_REG_STALL && !optimize_size)
+ x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
+ else
+ x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
+ emit_insn ((mode == DImode
+ ? gen_andsi3
+ : gen_anddi3) (high[0], high[0], GEN_INT (1)));
+ emit_move_insn (low[0], high[0]);
+ emit_insn ((mode == DImode
+ ? gen_xorsi3
+ : gen_xordi3) (low[0], low[0], GEN_INT (1)));
+ }
+
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (high[0], high[0], operands[2]));
+ return;
+ }
+
+ if (operands[1] == constm1_rtx)
+ {
+ /* For -1 << N, we can avoid the shld instruction, because we
+ know that we're shifting 0...31/63 ones into a -1. */
+ emit_move_insn (low[0], constm1_rtx);
+ if (optimize_size)
+ emit_move_insn (high[0], low[0]);
+ else
+ emit_move_insn (high[0], constm1_rtx);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld_1
+ : gen_x86_64_shld) (high[0], low[0], operands[2]));
+ }
+
+ emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+}
+
+void
+ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count == single_width * 2 - 1)
+ {
+ emit_move_insn (high[0], high[1]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ emit_move_insn (low[0], high[0]);
+
+ }
+ else if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ emit_move_insn (high[0], low[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ emit_move_insn (scratch, high[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (scratch, scratch,
+ GEN_INT (single_width - 1)));
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
+ }
+}
+
+void
+ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ ix86_expand_clear (high[0]);
+
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], operands[2]));
+
+ /* Heh. By reversing the arguments, we can reuse this pattern. */
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
+ }
+}
+
+/* Helper function for the string operations below. Dest VARIABLE whether
+ it is aligned to VALUE bytes. If true, jump to the label. */
+static rtx
+ix86_expand_aligntest (rtx variable, int value)
+{
+ rtx label = gen_label_rtx ();
+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+ if (GET_MODE (variable) == DImode)
+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+ else
+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+ 1, label);
+ return label;
+}
+
+/* Adjust COUNTER by the VALUE. */
+static void
+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
+{
+ if (GET_MODE (countreg) == DImode)
+ emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
+ else
+ emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register. */
+rtx
+ix86_zero_extend_to_Pmode (rtx exp)
+{
+ rtx r;
+ if (GET_MODE (exp) == VOIDmode)
+ return force_reg (Pmode, exp);
+ if (GET_MODE (exp) == Pmode)
+ return copy_to_mode_reg (Pmode, exp);
+ r = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendsidi2 (r, exp));
+ return r;
+}
+
+/* Expand string move (memcpy) operation. Use i386 string operations when
+ profitable. expand_clrmem contains similar code. */
+int
+ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
+{
+ rtx srcreg, destreg, countreg, srcexp, destexp;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* Can't use any of this if the user has appropriated esi or edi. */
+ if (global_regs[4] || global_regs[5])
+ return 0;
+
+ /* This simple hack avoids all inlining code and simplifies code below. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 64;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
+
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT
+ || GET_MODE (count_exp) == SImode
+ || x86_64_zext_immediate_operand (count_exp, VOIDmode))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ gcc_assert (counter_mode == SImode || counter_mode == DImode);
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ if (destreg != XEXP (dst, 0))
+ dst = replace_equiv_address_nv (dst, destreg);
+ srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+ if (srcreg != XEXP (src, 0))
+ src = replace_equiv_address_nv (src, srcreg);
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
+ sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
+ Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
+ count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
+ but we don't know whether upper 24 (resp. 56) bits of %ecx will be
+ known to be zero or not. The rep; movsb sequence causes higher
+ register pressure though, so take that into account. */
+
+ if ((!optimize || optimize_size)
+ && (count == 0
+ || ((count & 0x03)
+ && (!optimize_size
+ || count > 5 * 4
+ || (count & 3) + count / 4 > 6))))
+ {
+ emit_insn (gen_cld ());
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+ srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
+ destexp, srcexp));
+ }
+
+ /* For constant aligned (or small unaligned) copies use rep movsl
+ followed by code copying the rest. For PentiumPro ensure 8 byte
+ alignment to allow rep movsl acceleration. */
+
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int) 64))
+ {
+ unsigned HOST_WIDE_INT offset = 0;
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ rtx srcmem, dstmem;
+
+ emit_insn (gen_cld ());
+ if (count & ~(size - 1))
+ {
+ if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
+ {
+ enum machine_mode movs_mode = size == 4 ? SImode : DImode;
+
+ while (offset < (count & ~(size - 1)))
+ {
+ srcmem = adjust_automodify_address_nv (src, movs_mode,
+ srcreg, offset);
+ dstmem = adjust_automodify_address_nv (dst, movs_mode,
+ destreg, offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += size;
+ }
+ }
+ else
+ {
+ countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff));
+ countreg = copy_to_mode_reg (counter_mode, countreg);
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (size == 4 ? 2 : 3));
+ srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+ countreg, destexp, srcexp));
+ offset = count & ~(size - 1);
+ }
+ }
+ if (size == 8 && (count & 0x04))
+ {
+ srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += 4;
+ }
+ if (count & 0x02)
+ {
+ srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += 2;
+ }
+ if (count & 0x01)
+ {
+ srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ }
+ /* The generic code based on the glibc implementation:
+ - align destination to 4 bytes (8 byte alignment is used for PentiumPro
+ allowing accelerated copying there)
+ - copy the data using rep movsl
+ - copy the rest. */
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+ rtx srcmem, dstmem;
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
+ /* Get rid of MEM_OFFSETs, they won't be accurate. */
+ dst = change_address (dst, BLKmode, destreg);
+ src = change_address (src, BLKmode, srcreg);
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+
+ /* We don't use loops to align destination and to copy parts smaller
+ than 4 bytes, because gcc is able to optimize such code better (in
+ the case the destination or the count really is aligned, gcc is often
+ able to predict the branches) and also it is friendlier to the
+ hardware branch prediction.
+
+ Using loops is beneficial for generic case, because we can
+ handle small counts using the loops. Many CPUs (such as Athlon)
+ have large REP prefix setup costs.
+
+ This is quite costly. Maybe we can revisit this decision later or
+ add some customizability to this code. */
+
+ if (count == 0 && align < desired_alignment)
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
+ LEU, 0, counter_mode, 1, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
+ }
+ srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+ countreg2, destexp, srcexp));
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ {
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if ((align <= 4 || count == 0) && TARGET_64BIT)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ {
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ {
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+
+ return 1;
+}
+
+/* Expand string clear operation (bzero). Use i386 string operations when
+ profitable. expand_movmem contains similar code. */
+int
+ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
+{
+ rtx destreg, zeroreg, countreg, destexp;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* Can't use any of this if the user has appropriated esi. */
+ if (global_regs[4])
+ return 0;
+
+ /* This simple hack avoids all inlining code and simplifies code below. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 32;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT
+ || GET_MODE (count_exp) == SImode
+ || x86_64_zext_immediate_operand (count_exp, VOIDmode))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ if (destreg != XEXP (dst, 0))
+ dst = replace_equiv_address_nv (dst, destreg);
+
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. The movl $N, %ecx; rep; stosb
+ sequence is 7 bytes long, so if optimizing for size and count is
+ small enough that some stosl, stosw and stosb instructions without
+ rep are shorter, fall back into the next if. */
+
+ if ((!optimize || optimize_size)
+ && (count == 0
+ || ((count & 0x03)
+ && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
+ {
+ emit_insn (gen_cld ());
+
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ zeroreg = copy_to_mode_reg (QImode, const0_rtx);
+ destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+ emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
+ }
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int) 64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ unsigned HOST_WIDE_INT offset = 0;
+
+ emit_insn (gen_cld ());
+
+ zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
+ if (count & ~(size - 1))
+ {
+ unsigned HOST_WIDE_INT repcount;
+ unsigned int max_nonrep;
+
+ repcount = count >> (size == 4 ? 2 : 3);
+ if (!TARGET_64BIT)
+ repcount &= 0x3fffffff;
+
+ /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
+ movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
+ bytes. In both cases the latter seems to be faster for small
+ values of N. */
+ max_nonrep = size == 4 ? 7 : 4;
+ if (!optimize_size)
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM4:
+ case PROCESSOR_NOCONA:
+ max_nonrep = 3;
+ break;
+ default:
+ break;
+ }
+
+ if (repcount <= max_nonrep)
+ while (repcount-- > 0)
+ {
+ rtx mem = adjust_automodify_address_nv (dst,
+ GET_MODE (zeroreg),
+ destreg, offset);
+ emit_insn (gen_strset (destreg, mem, zeroreg));
+ offset += size;
+ }
+ else
+ {
+ countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (size == 4 ? 2 : 3));
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
+ destexp));
+ offset = count & ~(size - 1);
+ }
+ }
+ if (size == 8 && (count & 0x04))
+ {
+ rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ offset += 4;
+ }
+ if (count & 0x02)
+ {
+ rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ offset += 2;
+ }
+ if (count & 0x01)
+ {
+ rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ }
+ }
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+ /* Compute desired alignment of the string operation. */
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+ zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
+ /* Get rid of MEM_OFFSET, it won't be accurate. */
+ dst = change_address (dst, BLKmode, destreg);
+
+ if (count == 0 && align < desired_alignment)
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
+ LEU, 0, counter_mode, 1, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strset (destreg, dst,
+ (TARGET_64BIT
+ ? gen_rtx_SUBREG (SImode, zeroreg, 0)
+ : zeroreg)));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
+ }
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (TARGET_64BIT && (align <= 4 || count == 0))
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+ return 1;
+}
+
+/* Expand strlen. */
+int
+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
+{
+ rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+ /* The generic case of strlen expander is long. Avoid it's
+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !TARGET_INLINE_ALL_STRINGOPS
+ && !optimize_size
+ && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
+ return 0;
+
+ addr = force_reg (Pmode, XEXP (src, 0));
+ scratch1 = gen_reg_rtx (Pmode);
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !optimize_size)
+ {
+ /* Well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+
+ emit_move_insn (out, addr);
+
+ ix86_expand_strlensi_unroll_1 (out, src, align);
+
+ /* strlensi_unroll_1 returns the address of the zero at the end of
+ the string, like memchr(), so compute the length by subtracting
+ the start address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3 (out, out, addr));
+ else
+ emit_insn (gen_subsi3 (out, out, addr));
+ }
+ else
+ {
+ rtx unspec;
+ scratch2 = gen_reg_rtx (Pmode);
+ scratch3 = gen_reg_rtx (Pmode);
+ scratch4 = force_reg (Pmode, constm1_rtx);
+
+ emit_move_insn (scratch3, addr);
+ eoschar = force_reg (QImode, eoschar);
+
+ emit_insn (gen_cld ());
+ src = replace_equiv_address_nv (src, scratch3);
+
+ /* If .md starts supporting :P, this can be done in .md. */
+ unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+ scratch4), UNSPEC_SCAS);
+ emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
+ emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
+ }
+ else
+ {
+ emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
+ emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
+ }
+ }
+ return 1;
+}
+
+/* Expand the appropriate insns for doing strlen if not just doing
+ repnz; scasb
+
+ out = result, initialized with the start address
+ align_rtx = alignment of the address.
+ scratch = scratch register, initialized with the startaddress when
+ not aligned, otherwise undefined
+
+ This is just the body. It needs the initializations mentioned above and
+ some address computing at the end. These things are done in i386.md. */
+
+static void
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
+{
+ int align;
+ rtx tmp;
+ rtx align_2_label = NULL_RTX;
+ rtx align_3_label = NULL_RTX;
+ rtx align_4_label = gen_label_rtx ();
+ rtx end_0_label = gen_label_rtx ();
+ rtx mem;
+ rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
+ rtx cmp;
+
+ align = 0;
+ if (GET_CODE (align_rtx) == CONST_INT)
+ align = INTVAL (align_rtx);
+
+ /* Loop to check 1..3 bytes for null to get an aligned pointer. */
+
+ /* Is there a known alignment and is it less than 4? */
+ if (align < 4)
+ {
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
+ /* Is there a known alignment and is it not 2? */
+ if (align != 2)
+ {
+ align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
+ align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
+
+ /* Leave just the 3 lower bits. */
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
+ Pmode, 1, align_2_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
+ Pmode, 1, align_3_label);
+ }
+ else
+ {
+ /* Since the alignment is 2, we have to check 2 or 0 bytes;
+ check if is aligned to 4 - byte. */
+
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ }
+
+ mem = change_address (src, QImode, out);
+
+ /* Now compare the bytes. */
+
+ /* Compare the first n unaligned byte on a byte per byte basis. */
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
+ QImode, 1, end_0_label);
+
+ /* Increment the address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+
+ /* Not needed with an alignment of 2 */
+ if (align != 2)
+ {
+ emit_label (align_2_label);
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+
+ emit_label (align_3_label);
+ }
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+ }
+
+ /* Generate loop to check 4 bytes at a time. It is not a good idea to
+ align this loop. It gives only huge programs, but does not help to
+ speed up. */
+ emit_label (align_4_label);
+
+ mem = change_address (src, SImode, out);
+ emit_move_insn (scratch, mem);
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+
+ /* This formula yields a nonzero result iff one of the bytes is zero.
+ This saves three branches inside loop and many cycles. */
+
+ emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
+ emit_insn (gen_one_cmplsi2 (scratch, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg,
+ gen_int_mode (0x80808080, SImode)));
+ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
+ align_4_label);
+
+ if (TARGET_CMOVE)
+ {
+ rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, tmpreg);
+ emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
+
+ /* If zero is not in the first two bytes, move two bytes forward. */
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
+ gen_rtx_IF_THEN_ELSE (SImode, tmp,
+ reg,
+ tmpreg)));
+ /* Emit lea manually to avoid clobbering of flags. */
+ emit_insn (gen_rtx_SET (SImode, reg2,
+ gen_rtx_PLUS (Pmode, out, const2_rtx)));
+
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, out,
+ gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+ reg2,
+ out)));
+
+ }
+ else
+ {
+ rtx end_2_label = gen_label_rtx ();
+ /* Is zero in the first two bytes? */
+
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, end_2_label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = end_2_label;
+
+ /* Not in the first two. Move two bytes forward. */
+ emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const2_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const2_rtx));
+
+ emit_label (end_2_label);
+
+ }
+
+ /* Avoid branch in fixing the byte. */
+ tmpreg = gen_lowpart (QImode, tmpreg);
+ emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+ cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
+ else
+ emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
+
+ emit_label (end_0_label);
+}
+
+void
+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+ rtx callarg2 ATTRIBUTE_UNUSED,
+ rtx pop, int sibcall)
+{
+ rtx use = NULL, call;
+
+ if (pop == const0_rtx)
+ pop = NULL;
+ gcc_assert (!TARGET_64BIT || !pop);
+
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+ fnaddr = machopic_indirect_call_target (fnaddr);
+#endif
+ }
+ else
+ {
+ /* Static functions and indirect calls don't need the pic register. */
+ if (! TARGET_64BIT && flag_pic
+ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+ && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+ use_reg (&use, pic_offset_table_rtx);
+ }
+
+ if (TARGET_64BIT && INTVAL (callarg2) >= 0)
+ {
+ rtx al = gen_rtx_REG (QImode, 0);
+ emit_move_insn (al, callarg2);
+ use_reg (&use, al);
+ }
+
+ if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+ if (sibcall && TARGET_64BIT
+ && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ rtx addr;
+ addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
+ emit_move_insn (fnaddr, addr);
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+
+ call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+ if (retval)
+ call = gen_rtx_SET (VOIDmode, retval, call);
+ if (pop)
+ {
+ pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
+ pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ }
+
+ call = emit_call_insn (call);
+ if (use)
+ CALL_INSN_FUNCTION_USAGE (call) = use;
+}
+
+
+/* Clear stack slot assignments remembered from previous functions.
+ This is called from INIT_EXPANDERS once before RTL is emitted for each
+ function. */
+
+static struct machine_function *
+ix86_init_machine_status (void)
+{
+ struct machine_function *f;
+
+ f = ggc_alloc_cleared (sizeof (struct machine_function));
+ f->use_fast_prologue_epilogue_nregs = -1;
+ f->tls_descriptor_call_expanded_p = 0;
+
+ return f;
+}
+
+/* Return a MEM corresponding to a stack slot with mode MODE.
+ Allocate a new slot if necessary.
+
+ The RTL for a function can have several slots available: N is
+ which slot to use. */
+
+rtx
+assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
+{
+ struct stack_local_entry *s;
+
+ gcc_assert (n < MAX_386_STACK_LOCALS);
+
+ /* Virtual slot is valid only before vregs are instantiated. */
+ gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->mode == mode && s->n == n)
+ return s->rtl;
+
+ s = (struct stack_local_entry *)
+ ggc_alloc (sizeof (struct stack_local_entry));
+ s->n = n;
+ s->mode = mode;
+ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+
+ s->next = ix86_stack_locals;
+ ix86_stack_locals = s;
+ return s->rtl;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function. */
+
+static GTY(()) rtx ix86_tls_symbol;
+rtx
+ix86_tls_get_addr (void)
+{
+
+ if (!ix86_tls_symbol)
+ {
+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ (TARGET_ANY_GNU_TLS
+ && !TARGET_64BIT)
+ ? "___tls_get_addr"
+ : "__tls_get_addr");
+ }
+
+ return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+rtx
+ix86_tls_module_base (void)
+{
+
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ "_TLS_MODULE_BASE_");
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
+}
+
+/* Calculate the length of the memory address in the instruction
+ encoding. Does not include the one-byte modrm, opcode, or prefix. */
+
+int
+memory_address_length (rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int len;
+ int ok;
+
+ if (GET_CODE (addr) == PRE_DEC
+ || GET_CODE (addr) == POST_INC
+ || GET_CODE (addr) == PRE_MODIFY
+ || GET_CODE (addr) == POST_MODIFY)
+ return 0;
+
+ ok = ix86_decompose_address (addr, &parts);
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ len = 0;
+
+ /* Rule of thumb:
+ - esp as the base always wants an index,
+ - ebp as the base always wants a displacement. */
+
+ /* Register Indirect. */
+ if (base && !index && !disp)
+ {
+ /* esp (for its index) and ebp (for its displacement) need
+ the two-byte modrm form. */
+ if (addr == stack_pointer_rtx
+ || addr == arg_pointer_rtx
+ || addr == frame_pointer_rtx
+ || addr == hard_frame_pointer_rtx)
+ len = 1;
+ }
+
+ /* Direct Addressing. */
+ else if (disp && !base && !index)
+ len = 4;
+
+ else
+ {
+ /* Find the length of the displacement constant. */
+ if (disp)
+ {
+ if (base && satisfies_constraint_K (disp))
+ len = 1;
+ else
+ len = 4;
+ }
+ /* ebp always wants a displacement. */
+ else if (base == hard_frame_pointer_rtx)
+ len = 1;
+
+ /* An index requires the two-byte modrm form.... */
+ if (index
+ /* ...like esp, which always wants an index. */
+ || base == stack_pointer_rtx
+ || base == arg_pointer_rtx
+ || base == frame_pointer_rtx)
+ len += 1;
+ }
+
+ return len;
+}
+
+/* Compute default value for "length_immediate" attribute. When SHORTFORM
+ is set, expect that insn have 8bit immediate alternative. */
+int
+ix86_attr_length_immediate_default (rtx insn, int shortform)
+{
+ int len = 0;
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (CONSTANT_P (recog_data.operand[i]))
+ {
+ gcc_assert (!len);
+ if (shortform && satisfies_constraint_K (recog_data.operand[i]))
+ len = 1;
+ else
+ {
+ switch (get_attr_mode (insn))
+ {
+ case MODE_QI:
+ len+=1;
+ break;
+ case MODE_HI:
+ len+=2;
+ break;
+ case MODE_SI:
+ len+=4;
+ break;
+ /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
+ case MODE_DI:
+ len+=4;
+ break;
+ default:
+ fatal_insn ("unknown insn mode", insn);
+ }
+ }
+ }
+ return len;
+}
+/* Compute default value for "length_address" attribute. */
+int
+ix86_attr_length_address_default (rtx insn)
+{
+ int i;
+
+ if (get_attr_type (insn) == TYPE_LEA)
+ {
+ rtx set = PATTERN (insn);
+
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+
+ gcc_assert (GET_CODE (set) == SET);
+
+ return memory_address_length (SET_SRC (set));
+ }
+
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (GET_CODE (recog_data.operand[i]) == MEM)
+ {
+ return memory_address_length (XEXP (recog_data.operand[i], 0));
+ break;
+ }
+ return 0;
+}
+
+/* Return the maximum number of instructions a cpu can issue. */
+
+static int
+ix86_issue_rate (void)
+{
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ case PROCESSOR_K6:
+ return 2;
+
+ case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_PENTIUM4:
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_NOCONA:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ return 3;
+ /* APPLE LOCAL begin mainline */
+ case PROCESSOR_CORE2:
+ return 4;
+ /* APPLE LOCAL end mainline */
+
+ default:
+ return 1;
+ }
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+ by DEP_INSN and nothing set by DEP_INSN. */
+
+static int
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx set, set2;
+
+ /* Simplify the test for uninteresting insns. */
+ if (insn_type != TYPE_SETCC
+ && insn_type != TYPE_ICMOV
+ && insn_type != TYPE_FCMOV
+ && insn_type != TYPE_IBR)
+ return 0;
+
+ if ((set = single_set (dep_insn)) != 0)
+ {
+ set = SET_DEST (set);
+ set2 = NULL_RTX;
+ }
+ else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+ && XVECLEN (PATTERN (dep_insn), 0) == 2
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+ {
+ set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ }
+ else
+ return 0;
+
+ if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
+ return 0;
+
+ /* This test is true if the dependent insn reads the flags but
+ not any other potentially set register. */
+ if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+ return 0;
+
+ if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+ return 0;
+
+ return 1;
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
+ address with operands set by DEP_INSN. */
+
+static int
+ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx addr;
+
+ if (insn_type == TYPE_LEA
+ && TARGET_PENTIUM)
+ {
+ addr = PATTERN (insn);
+
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+
+ gcc_assert (GET_CODE (addr) == SET);
+
+ addr = SET_SRC (addr);
+ }
+ else
+ {
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (GET_CODE (recog_data.operand[i]) == MEM)
+ {
+ addr = XEXP (recog_data.operand[i], 0);
+ goto found;
+ }
+ return 0;
+ found:;
+ }
+
+ return modified_in_p (addr, dep_insn);
+}
+
+static int
+ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+ enum attr_type insn_type, dep_insn_type;
+ enum attr_memory memory;
+ rtx set, set2;
+ int dep_insn_code_number;
+
+ /* Anti and output dependencies have zero cost on all CPUs. */
+ if (REG_NOTE_KIND (link) != 0)
+ return 0;
+
+ dep_insn_code_number = recog_memoized (dep_insn);
+
+ /* If we can't recognize the insns, we can't really do anything. */
+ if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+ return cost;
+
+ insn_type = get_attr_type (insn);
+ dep_insn_type = get_attr_type (dep_insn);
+
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ /* Address Generation Interlock adds a cycle of latency. */
+ if (ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+
+ /* ??? Compares pair with jump/setcc. */
+ if (ix86_flags_dependent (insn, dep_insn, insn_type))
+ cost = 0;
+
+ /* Floating point stores require value to be ready one cycle earlier. */
+ if (insn_type == TYPE_FMOV
+ && get_attr_memory (insn) == MEMORY_STORE
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+ break;
+
+ case PROCESSOR_PENTIUMPRO:
+ memory = get_attr_memory (insn);
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* There is one cycle extra latency between an FP op and a store. */
+ if (insn_type == TYPE_FMOV
+ && (set = single_set (dep_insn)) != NULL_RTX
+ && (set2 = single_set (insn)) != NULL_RTX
+ && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+ && GET_CODE (SET_DEST (set2)) == MEM)
+ cost += 1;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 1)
+ cost--;
+ }
+ break;
+
+ case PROCESSOR_K6:
+ memory = get_attr_memory (insn);
+
+ /* The esp dependency is resolved before the instruction is really
+ finished. */
+ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+ && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+ return 1;
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 2)
+ cost -= 2;
+ else
+ cost = 1;
+ }
+ break;
+
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ memory = get_attr_memory (insn);
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ enum attr_unit unit = get_attr_unit (insn);
+ int loadcost = 3;
+
+ /* Because of the difference between the length of integer and
+ floating unit pipeline preparation stages, the memory operands
+ for floating point are cheaper.
+
+ ??? For Athlon it the difference is most probably 2. */
+ if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+ loadcost = 3;
+ else
+ loadcost = TARGET_ATHLON ? 2 : 0;
+
+ if (cost >= loadcost)
+ cost -= loadcost;
+ else
+ cost = 0;
+ }
+
+ default:
+ break;
+ }
+
+ return cost;
+}
+
+/* How many alternative schedules to try. This should be as wide as the
+ scheduling freedom in the DFA, but no wider. Making this value too
+ large results extra work for the scheduler. */
+
+static int
+ia32_multipass_dfa_lookahead (void)
+{
+ if (ix86_tune == PROCESSOR_PENTIUM)
+ return 2;
+
+ if (ix86_tune == PROCESSOR_PENTIUMPRO
+ || ix86_tune == PROCESSOR_K6)
+ return 1;
+
+ else
+ return 0;
+}
+
+
+/* Compute the alignment given to a constant that is being placed in memory.
+ EXP is the constant and ALIGN is the alignment that the object would
+ ordinarily have.
+ The value of this function is used instead of that alignment to align
+ the object. */
+
+int
+ix86_constant_alignment (tree exp, int align)
+{
+ if (TREE_CODE (exp) == REAL_CST)
+ {
+ if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
+ return 64;
+ else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
+ return 128;
+ }
+ else if (!optimize_size && TREE_CODE (exp) == STRING_CST
+ && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
+ return BITS_PER_WORD;
+
+/* APPLE LOCAL begin 4090661 */
+#if TARGET_MACHO
+ /* Without this, static arrays initialized to strings get aligned
+ to 32 bytes. These go in cstring, so would result in a lot of extra
+ padding in files with a couple of small strings. 4090661. */
+ else if (TREE_CODE (exp) == STRING_CST)
+ {
+ if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size)
+ return BITS_PER_WORD;
+ else
+ return 8;
+ }
+#endif
+/* APPLE LOCAL end 4090661 */
+ return align;
+}
+
+/* Compute the alignment for a static variable.
+ TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this function is used
+ instead of that alignment to align the object. */
+
+int
+ix86_data_alignment (tree type, int align)
+{
+ int max_align = optimize_size ? BITS_PER_WORD : 256;
+
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+ && align < max_align)
+ align = max_align;
+
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if (TYPE_MODE (type) == XCmode && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+
+ return align;
+}
+
+/* Compute the alignment for a local variable.
+ TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object. */
+
+int
+ix86_local_alignment (tree type, int align)
+{
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if (TYPE_MODE (type) == XCmode && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+ return align;
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+void
+x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
+{
+ if (!TARGET_64BIT)
+ {
+ /* Compute offset from the end of the jmp to the target function. */
+ rtx disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (tramp, 10),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (gen_rtx_MEM (QImode, tramp),
+ gen_int_mode (0xb9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
+ gen_int_mode (0xe9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
+ }
+ else
+ {
+ int offset = 0;
+ /* Try to load address using shorter movl instead of movabs.
+ We may want to support movq for kernel mode, but kernel does not use
+ trampolines at the moment. */
+ if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+ {
+ fnaddr = copy_to_mode_reg (DImode, fnaddr);
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb41, HImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
+ gen_lowpart (SImode, fnaddr));
+ offset += 6;
+ }
+ else
+ {
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ fnaddr);
+ offset += 10;
+ }
+ /* Load static chain using movabs to r10. */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xba49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ cxt);
+ offset += 10;
+ /* Jump to the r11 */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xff49, HImode));
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
+ gen_int_mode (0xe3, QImode));
+ offset += 3;
+ gcc_assert (offset <= TRAMPOLINE_SIZE);
+ }
+
+#ifdef ENABLE_EXECUTE_STACK
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+ LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
+#endif
+}
+
+/* Codes for all the SSE/MMX builtins. */
+enum ix86_builtins
+{
+ IX86_BUILTIN_ADDPS,
+ IX86_BUILTIN_ADDSS,
+ IX86_BUILTIN_DIVPS,
+ IX86_BUILTIN_DIVSS,
+ IX86_BUILTIN_MULPS,
+ IX86_BUILTIN_MULSS,
+ IX86_BUILTIN_SUBPS,
+ IX86_BUILTIN_SUBSS,
+
+ IX86_BUILTIN_CMPEQPS,
+ IX86_BUILTIN_CMPLTPS,
+ IX86_BUILTIN_CMPLEPS,
+ IX86_BUILTIN_CMPGTPS,
+ IX86_BUILTIN_CMPGEPS,
+ IX86_BUILTIN_CMPNEQPS,
+ IX86_BUILTIN_CMPNLTPS,
+ IX86_BUILTIN_CMPNLEPS,
+ IX86_BUILTIN_CMPNGTPS,
+ IX86_BUILTIN_CMPNGEPS,
+ IX86_BUILTIN_CMPORDPS,
+ IX86_BUILTIN_CMPUNORDPS,
+ IX86_BUILTIN_CMPEQSS,
+ IX86_BUILTIN_CMPLTSS,
+ IX86_BUILTIN_CMPLESS,
+ IX86_BUILTIN_CMPNEQSS,
+ IX86_BUILTIN_CMPNLTSS,
+ IX86_BUILTIN_CMPNLESS,
+ IX86_BUILTIN_CMPNGTSS,
+ IX86_BUILTIN_CMPNGESS,
+ IX86_BUILTIN_CMPORDSS,
+ IX86_BUILTIN_CMPUNORDSS,
+
+ IX86_BUILTIN_COMIEQSS,
+ IX86_BUILTIN_COMILTSS,
+ IX86_BUILTIN_COMILESS,
+ IX86_BUILTIN_COMIGTSS,
+ IX86_BUILTIN_COMIGESS,
+ IX86_BUILTIN_COMINEQSS,
+ IX86_BUILTIN_UCOMIEQSS,
+ IX86_BUILTIN_UCOMILTSS,
+ IX86_BUILTIN_UCOMILESS,
+ IX86_BUILTIN_UCOMIGTSS,
+ IX86_BUILTIN_UCOMIGESS,
+ IX86_BUILTIN_UCOMINEQSS,
+
+ IX86_BUILTIN_CVTPI2PS,
+ IX86_BUILTIN_CVTPS2PI,
+ IX86_BUILTIN_CVTSI2SS,
+ IX86_BUILTIN_CVTSI642SS,
+ IX86_BUILTIN_CVTSS2SI,
+ IX86_BUILTIN_CVTSS2SI64,
+ IX86_BUILTIN_CVTTPS2PI,
+ IX86_BUILTIN_CVTTSS2SI,
+ IX86_BUILTIN_CVTTSS2SI64,
+
+ IX86_BUILTIN_MAXPS,
+ IX86_BUILTIN_MAXSS,
+ IX86_BUILTIN_MINPS,
+ IX86_BUILTIN_MINSS,
+
+ IX86_BUILTIN_LOADUPS,
+ IX86_BUILTIN_STOREUPS,
+ IX86_BUILTIN_MOVSS,
+
+ IX86_BUILTIN_MOVHLPS,
+ IX86_BUILTIN_MOVLHPS,
+ IX86_BUILTIN_LOADHPS,
+ IX86_BUILTIN_LOADLPS,
+ IX86_BUILTIN_STOREHPS,
+ IX86_BUILTIN_STORELPS,
+
+ IX86_BUILTIN_MASKMOVQ,
+ IX86_BUILTIN_MOVMSKPS,
+ IX86_BUILTIN_PMOVMSKB,
+
+ IX86_BUILTIN_MOVNTPS,
+ IX86_BUILTIN_MOVNTQ,
+
+ IX86_BUILTIN_LOADDQU,
+ IX86_BUILTIN_STOREDQU,
+
+ IX86_BUILTIN_PACKSSWB,
+ IX86_BUILTIN_PACKSSDW,
+ IX86_BUILTIN_PACKUSWB,
+
+ IX86_BUILTIN_PADDB,
+ IX86_BUILTIN_PADDW,
+ IX86_BUILTIN_PADDD,
+ IX86_BUILTIN_PADDQ,
+ IX86_BUILTIN_PADDSB,
+ IX86_BUILTIN_PADDSW,
+ IX86_BUILTIN_PADDUSB,
+ IX86_BUILTIN_PADDUSW,
+ IX86_BUILTIN_PSUBB,
+ IX86_BUILTIN_PSUBW,
+ IX86_BUILTIN_PSUBD,
+ IX86_BUILTIN_PSUBQ,
+ IX86_BUILTIN_PSUBSB,
+ IX86_BUILTIN_PSUBSW,
+ IX86_BUILTIN_PSUBUSB,
+ IX86_BUILTIN_PSUBUSW,
+
+ IX86_BUILTIN_PAND,
+ IX86_BUILTIN_PANDN,
+ IX86_BUILTIN_POR,
+ IX86_BUILTIN_PXOR,
+
+ IX86_BUILTIN_PAVGB,
+ IX86_BUILTIN_PAVGW,
+
+ IX86_BUILTIN_PCMPEQB,
+ IX86_BUILTIN_PCMPEQW,
+ IX86_BUILTIN_PCMPEQD,
+ IX86_BUILTIN_PCMPGTB,
+ IX86_BUILTIN_PCMPGTW,
+ IX86_BUILTIN_PCMPGTD,
+
+ IX86_BUILTIN_PMADDWD,
+
+ IX86_BUILTIN_PMAXSW,
+ IX86_BUILTIN_PMAXUB,
+ IX86_BUILTIN_PMINSW,
+ IX86_BUILTIN_PMINUB,
+
+ IX86_BUILTIN_PMULHUW,
+ IX86_BUILTIN_PMULHW,
+ IX86_BUILTIN_PMULLW,
+
+ IX86_BUILTIN_PSADBW,
+ IX86_BUILTIN_PSHUFW,
+
+ IX86_BUILTIN_PSLLW,
+ IX86_BUILTIN_PSLLD,
+ IX86_BUILTIN_PSLLQ,
+ IX86_BUILTIN_PSRAW,
+ IX86_BUILTIN_PSRAD,
+ IX86_BUILTIN_PSRLW,
+ IX86_BUILTIN_PSRLD,
+ IX86_BUILTIN_PSRLQ,
+ IX86_BUILTIN_PSLLWI,
+ IX86_BUILTIN_PSLLDI,
+ IX86_BUILTIN_PSLLQI,
+ IX86_BUILTIN_PSRAWI,
+ IX86_BUILTIN_PSRADI,
+ IX86_BUILTIN_PSRLWI,
+ IX86_BUILTIN_PSRLDI,
+ IX86_BUILTIN_PSRLQI,
+
+ IX86_BUILTIN_PUNPCKHBW,
+ IX86_BUILTIN_PUNPCKHWD,
+ IX86_BUILTIN_PUNPCKHDQ,
+ IX86_BUILTIN_PUNPCKLBW,
+ IX86_BUILTIN_PUNPCKLWD,
+ IX86_BUILTIN_PUNPCKLDQ,
+
+ IX86_BUILTIN_SHUFPS,
+
+ IX86_BUILTIN_RCPPS,
+ IX86_BUILTIN_RCPSS,
+ IX86_BUILTIN_RSQRTPS,
+ IX86_BUILTIN_RSQRTSS,
+ IX86_BUILTIN_SQRTPS,
+ IX86_BUILTIN_SQRTSS,
+
+ IX86_BUILTIN_UNPCKHPS,
+ IX86_BUILTIN_UNPCKLPS,
+
+ IX86_BUILTIN_ANDPS,
+ IX86_BUILTIN_ANDNPS,
+ IX86_BUILTIN_ORPS,
+ IX86_BUILTIN_XORPS,
+
+ IX86_BUILTIN_EMMS,
+ IX86_BUILTIN_LDMXCSR,
+ IX86_BUILTIN_STMXCSR,
+ IX86_BUILTIN_SFENCE,
+
+ /* 3DNow! Original */
+ IX86_BUILTIN_FEMMS,
+ IX86_BUILTIN_PAVGUSB,
+ IX86_BUILTIN_PF2ID,
+ IX86_BUILTIN_PFACC,
+ IX86_BUILTIN_PFADD,
+ IX86_BUILTIN_PFCMPEQ,
+ IX86_BUILTIN_PFCMPGE,
+ IX86_BUILTIN_PFCMPGT,
+ IX86_BUILTIN_PFMAX,
+ IX86_BUILTIN_PFMIN,
+ IX86_BUILTIN_PFMUL,
+ IX86_BUILTIN_PFRCP,
+ IX86_BUILTIN_PFRCPIT1,
+ IX86_BUILTIN_PFRCPIT2,
+ IX86_BUILTIN_PFRSQIT1,
+ IX86_BUILTIN_PFRSQRT,
+ IX86_BUILTIN_PFSUB,
+ IX86_BUILTIN_PFSUBR,
+ IX86_BUILTIN_PI2FD,
+ IX86_BUILTIN_PMULHRW,
+
+ /* 3DNow! Athlon Extensions */
+ IX86_BUILTIN_PF2IW,
+ IX86_BUILTIN_PFNACC,
+ IX86_BUILTIN_PFPNACC,
+ IX86_BUILTIN_PI2FW,
+ IX86_BUILTIN_PSWAPDSI,
+ IX86_BUILTIN_PSWAPDSF,
+
+ /* SSE2 */
+ IX86_BUILTIN_ADDPD,
+ IX86_BUILTIN_ADDSD,
+ IX86_BUILTIN_DIVPD,
+ IX86_BUILTIN_DIVSD,
+ IX86_BUILTIN_MULPD,
+ IX86_BUILTIN_MULSD,
+ IX86_BUILTIN_SUBPD,
+ IX86_BUILTIN_SUBSD,
+
+ IX86_BUILTIN_CMPEQPD,
+ IX86_BUILTIN_CMPLTPD,
+ IX86_BUILTIN_CMPLEPD,
+ IX86_BUILTIN_CMPGTPD,
+ IX86_BUILTIN_CMPGEPD,
+ IX86_BUILTIN_CMPNEQPD,
+ IX86_BUILTIN_CMPNLTPD,
+ IX86_BUILTIN_CMPNLEPD,
+ IX86_BUILTIN_CMPNGTPD,
+ IX86_BUILTIN_CMPNGEPD,
+ IX86_BUILTIN_CMPORDPD,
+ IX86_BUILTIN_CMPUNORDPD,
+ IX86_BUILTIN_CMPNEPD,
+ IX86_BUILTIN_CMPEQSD,
+ IX86_BUILTIN_CMPLTSD,
+ IX86_BUILTIN_CMPLESD,
+ IX86_BUILTIN_CMPNEQSD,
+ IX86_BUILTIN_CMPNLTSD,
+ IX86_BUILTIN_CMPNLESD,
+ IX86_BUILTIN_CMPORDSD,
+ IX86_BUILTIN_CMPUNORDSD,
+ IX86_BUILTIN_CMPNESD,
+
+ IX86_BUILTIN_COMIEQSD,
+ IX86_BUILTIN_COMILTSD,
+ IX86_BUILTIN_COMILESD,
+ IX86_BUILTIN_COMIGTSD,
+ IX86_BUILTIN_COMIGESD,
+ IX86_BUILTIN_COMINEQSD,
+ IX86_BUILTIN_UCOMIEQSD,
+ IX86_BUILTIN_UCOMILTSD,
+ IX86_BUILTIN_UCOMILESD,
+ IX86_BUILTIN_UCOMIGTSD,
+ IX86_BUILTIN_UCOMIGESD,
+ IX86_BUILTIN_UCOMINEQSD,
+
+ IX86_BUILTIN_MAXPD,
+ IX86_BUILTIN_MAXSD,
+ IX86_BUILTIN_MINPD,
+ IX86_BUILTIN_MINSD,
+
+ IX86_BUILTIN_ANDPD,
+ IX86_BUILTIN_ANDNPD,
+ IX86_BUILTIN_ORPD,
+ IX86_BUILTIN_XORPD,
+
+ IX86_BUILTIN_SQRTPD,
+ IX86_BUILTIN_SQRTSD,
+
+ IX86_BUILTIN_UNPCKHPD,
+ IX86_BUILTIN_UNPCKLPD,
+
+ IX86_BUILTIN_SHUFPD,
+
+ IX86_BUILTIN_LOADUPD,
+ IX86_BUILTIN_STOREUPD,
+ IX86_BUILTIN_MOVSD,
+
+ IX86_BUILTIN_LOADHPD,
+ IX86_BUILTIN_LOADLPD,
+
+ IX86_BUILTIN_CVTDQ2PD,
+ IX86_BUILTIN_CVTDQ2PS,
+
+ IX86_BUILTIN_CVTPD2DQ,
+ IX86_BUILTIN_CVTPD2PI,
+ IX86_BUILTIN_CVTPD2PS,
+ IX86_BUILTIN_CVTTPD2DQ,
+ IX86_BUILTIN_CVTTPD2PI,
+
+ IX86_BUILTIN_CVTPI2PD,
+ IX86_BUILTIN_CVTSI2SD,
+ IX86_BUILTIN_CVTSI642SD,
+
+ IX86_BUILTIN_CVTSD2SI,
+ IX86_BUILTIN_CVTSD2SI64,
+ IX86_BUILTIN_CVTSD2SS,
+ IX86_BUILTIN_CVTSS2SD,
+ IX86_BUILTIN_CVTTSD2SI,
+ IX86_BUILTIN_CVTTSD2SI64,
+
+ IX86_BUILTIN_CVTPS2DQ,
+ IX86_BUILTIN_CVTPS2PD,
+ IX86_BUILTIN_CVTTPS2DQ,
+
+ IX86_BUILTIN_MOVNTI,
+ IX86_BUILTIN_MOVNTPD,
+ IX86_BUILTIN_MOVNTDQ,
+
+ /* SSE2 MMX */
+ IX86_BUILTIN_MASKMOVDQU,
+ IX86_BUILTIN_MOVMSKPD,
+ IX86_BUILTIN_PMOVMSKB128,
+
+ /* APPLE LOCAL begin 4099020 */
+ IX86_BUILTIN_MOVQ,
+ IX86_BUILTIN_LOADQ,
+ IX86_BUILTIN_STOREQ,
+ /* APPLE LOCAL end 4099020 */
+
+ IX86_BUILTIN_PACKSSWB128,
+ IX86_BUILTIN_PACKSSDW128,
+ IX86_BUILTIN_PACKUSWB128,
+
+ IX86_BUILTIN_PADDB128,
+ IX86_BUILTIN_PADDW128,
+ IX86_BUILTIN_PADDD128,
+ IX86_BUILTIN_PADDQ128,
+ IX86_BUILTIN_PADDSB128,
+ IX86_BUILTIN_PADDSW128,
+ IX86_BUILTIN_PADDUSB128,
+ IX86_BUILTIN_PADDUSW128,
+ IX86_BUILTIN_PSUBB128,
+ IX86_BUILTIN_PSUBW128,
+ IX86_BUILTIN_PSUBD128,
+ IX86_BUILTIN_PSUBQ128,
+ IX86_BUILTIN_PSUBSB128,
+ IX86_BUILTIN_PSUBSW128,
+ IX86_BUILTIN_PSUBUSB128,
+ IX86_BUILTIN_PSUBUSW128,
+
+ IX86_BUILTIN_PAND128,
+ IX86_BUILTIN_PANDN128,
+ IX86_BUILTIN_POR128,
+ IX86_BUILTIN_PXOR128,
+
+ IX86_BUILTIN_PAVGB128,
+ IX86_BUILTIN_PAVGW128,
+
+ IX86_BUILTIN_PCMPEQB128,
+ IX86_BUILTIN_PCMPEQW128,
+ IX86_BUILTIN_PCMPEQD128,
+ IX86_BUILTIN_PCMPGTB128,
+ IX86_BUILTIN_PCMPGTW128,
+ IX86_BUILTIN_PCMPGTD128,
+
+ IX86_BUILTIN_PMADDWD128,
+
+ IX86_BUILTIN_PMAXSW128,
+ IX86_BUILTIN_PMAXUB128,
+ IX86_BUILTIN_PMINSW128,
+ IX86_BUILTIN_PMINUB128,
+
+ IX86_BUILTIN_PMULUDQ,
+ IX86_BUILTIN_PMULUDQ128,
+ IX86_BUILTIN_PMULHUW128,
+ IX86_BUILTIN_PMULHW128,
+ IX86_BUILTIN_PMULLW128,
+
+ IX86_BUILTIN_PSADBW128,
+ IX86_BUILTIN_PSHUFHW,
+ IX86_BUILTIN_PSHUFLW,
+ IX86_BUILTIN_PSHUFD,
+
+ IX86_BUILTIN_PSLLW128,
+ IX86_BUILTIN_PSLLD128,
+ IX86_BUILTIN_PSLLQ128,
+ IX86_BUILTIN_PSRAW128,
+ IX86_BUILTIN_PSRAD128,
+ IX86_BUILTIN_PSRLW128,
+ IX86_BUILTIN_PSRLD128,
+ IX86_BUILTIN_PSRLQ128,
+ IX86_BUILTIN_PSLLDQI128,
+ /* APPLE LOCAL 591583 */
+ IX86_BUILTIN_PSLLDQI128_BYTESHIFT,
+ IX86_BUILTIN_PSLLWI128,
+ IX86_BUILTIN_PSLLDI128,
+ IX86_BUILTIN_PSLLQI128,
+ IX86_BUILTIN_PSRAWI128,
+ IX86_BUILTIN_PSRADI128,
+ IX86_BUILTIN_PSRLDQI128,
+ /* APPLE LOCAL 591583 */
+ IX86_BUILTIN_PSRLDQI128_BYTESHIFT,
+ IX86_BUILTIN_PSRLWI128,
+ IX86_BUILTIN_PSRLDI128,
+ IX86_BUILTIN_PSRLQI128,
+
+ IX86_BUILTIN_PUNPCKHBW128,
+ IX86_BUILTIN_PUNPCKHWD128,
+ IX86_BUILTIN_PUNPCKHDQ128,
+ IX86_BUILTIN_PUNPCKHQDQ128,
+ IX86_BUILTIN_PUNPCKLBW128,
+ IX86_BUILTIN_PUNPCKLWD128,
+ IX86_BUILTIN_PUNPCKLDQ128,
+ IX86_BUILTIN_PUNPCKLQDQ128,
+
+ IX86_BUILTIN_CLFLUSH,
+ IX86_BUILTIN_MFENCE,
+ IX86_BUILTIN_LFENCE,
+
+ /* Prescott New Instructions. */
+ IX86_BUILTIN_ADDSUBPS,
+ IX86_BUILTIN_HADDPS,
+ IX86_BUILTIN_HSUBPS,
+ IX86_BUILTIN_MOVSHDUP,
+ IX86_BUILTIN_MOVSLDUP,
+ IX86_BUILTIN_ADDSUBPD,
+ IX86_BUILTIN_HADDPD,
+ IX86_BUILTIN_HSUBPD,
+ IX86_BUILTIN_LDDQU,
+
+ IX86_BUILTIN_MONITOR,
+ IX86_BUILTIN_MWAIT,
+ /* APPLE LOCAL begin mainline */
+ /* Merom New Instructions. */
+ IX86_BUILTIN_PHADDW,
+ IX86_BUILTIN_PHADDD,
+ IX86_BUILTIN_PHADDSW,
+ IX86_BUILTIN_PHSUBW,
+ IX86_BUILTIN_PHSUBD,
+ IX86_BUILTIN_PHSUBSW,
+ IX86_BUILTIN_PMADDUBSW,
+ IX86_BUILTIN_PMULHRSW,
+ IX86_BUILTIN_PSHUFB,
+ IX86_BUILTIN_PSIGNB,
+ IX86_BUILTIN_PSIGNW,
+ IX86_BUILTIN_PSIGND,
+ IX86_BUILTIN_PALIGNR,
+ IX86_BUILTIN_PABSB,
+ IX86_BUILTIN_PABSW,
+ IX86_BUILTIN_PABSD,
+
+ IX86_BUILTIN_PHADDW128,
+ IX86_BUILTIN_PHADDD128,
+ IX86_BUILTIN_PHADDSW128,
+ IX86_BUILTIN_PHSUBW128,
+ IX86_BUILTIN_PHSUBD128,
+ IX86_BUILTIN_PHSUBSW128,
+ IX86_BUILTIN_PMADDUBSW128,
+ IX86_BUILTIN_PMULHRSW128,
+ IX86_BUILTIN_PSHUFB128,
+ IX86_BUILTIN_PSIGNB128,
+ IX86_BUILTIN_PSIGNW128,
+ IX86_BUILTIN_PSIGND128,
+ IX86_BUILTIN_PALIGNR128,
+ IX86_BUILTIN_PABSB128,
+ IX86_BUILTIN_PABSW128,
+ IX86_BUILTIN_PABSD128,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* AMDFAM10 - SSE4A New Instructions. */
+ IX86_BUILTIN_MOVNTSD,
+ IX86_BUILTIN_MOVNTSS,
+ IX86_BUILTIN_EXTRQI,
+ IX86_BUILTIN_EXTRQ,
+ IX86_BUILTIN_INSERTQI,
+ IX86_BUILTIN_INSERTQ,
+
+ /* SSE4.1. */
+ IX86_BUILTIN_BLENDPD,
+ IX86_BUILTIN_BLENDPS,
+ IX86_BUILTIN_BLENDVPD,
+ IX86_BUILTIN_BLENDVPS,
+ IX86_BUILTIN_PBLENDVB128,
+ IX86_BUILTIN_PBLENDW128,
+
+ IX86_BUILTIN_DPPD,
+ IX86_BUILTIN_DPPS,
+
+ IX86_BUILTIN_INSERTPS128,
+
+ IX86_BUILTIN_MOVNTDQA,
+ IX86_BUILTIN_MPSADBW128,
+ IX86_BUILTIN_PACKUSDW128,
+ IX86_BUILTIN_PCMPEQQ,
+ IX86_BUILTIN_PHMINPOSUW128,
+
+ IX86_BUILTIN_PMAXSB128,
+ IX86_BUILTIN_PMAXSD128,
+ IX86_BUILTIN_PMAXUD128,
+ IX86_BUILTIN_PMAXUW128,
+
+ IX86_BUILTIN_PMINSB128,
+ IX86_BUILTIN_PMINSD128,
+ IX86_BUILTIN_PMINUD128,
+ IX86_BUILTIN_PMINUW128,
+
+ IX86_BUILTIN_PMOVSXBW128,
+ IX86_BUILTIN_PMOVSXBD128,
+ IX86_BUILTIN_PMOVSXBQ128,
+ IX86_BUILTIN_PMOVSXWD128,
+ IX86_BUILTIN_PMOVSXWQ128,
+ IX86_BUILTIN_PMOVSXDQ128,
+
+ IX86_BUILTIN_PMOVZXBW128,
+ IX86_BUILTIN_PMOVZXBD128,
+ IX86_BUILTIN_PMOVZXBQ128,
+ IX86_BUILTIN_PMOVZXWD128,
+ IX86_BUILTIN_PMOVZXWQ128,
+ IX86_BUILTIN_PMOVZXDQ128,
+
+ IX86_BUILTIN_PMULDQ128,
+ IX86_BUILTIN_PMULLD128,
+
+ IX86_BUILTIN_ROUNDPD,
+ IX86_BUILTIN_ROUNDPS,
+ IX86_BUILTIN_ROUNDSD,
+ IX86_BUILTIN_ROUNDSS,
+
+ IX86_BUILTIN_PTESTZ,
+ IX86_BUILTIN_PTESTC,
+ IX86_BUILTIN_PTESTNZC,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* APPLE LOCAL end mainline */
+ IX86_BUILTIN_VEC_INIT_V2SI,
+ IX86_BUILTIN_VEC_INIT_V4HI,
+ IX86_BUILTIN_VEC_INIT_V8QI,
+ IX86_BUILTIN_VEC_EXT_V2DF,
+ IX86_BUILTIN_VEC_EXT_V2DI,
+ IX86_BUILTIN_VEC_EXT_V4SF,
+ IX86_BUILTIN_VEC_EXT_V4SI,
+ IX86_BUILTIN_VEC_EXT_V8HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* deletion */
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_EXT_V2SI,
+ IX86_BUILTIN_VEC_EXT_V4HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_EXT_V16QI,
+ IX86_BUILTIN_VEC_SET_V2DI,
+ IX86_BUILTIN_VEC_SET_V4SF,
+ IX86_BUILTIN_VEC_SET_V4SI,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_SET_V8HI,
+ IX86_BUILTIN_VEC_SET_V4HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_SET_V16QI,
+
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
+ /* SSE4.2. */
+ IX86_BUILTIN_CRC32QI,
+ IX86_BUILTIN_CRC32HI,
+ IX86_BUILTIN_CRC32SI,
+ IX86_BUILTIN_CRC32DI,
+
+ IX86_BUILTIN_PCMPESTRI128,
+ IX86_BUILTIN_PCMPESTRM128,
+ IX86_BUILTIN_PCMPESTRA128,
+ IX86_BUILTIN_PCMPESTRC128,
+ IX86_BUILTIN_PCMPESTRO128,
+ IX86_BUILTIN_PCMPESTRS128,
+ IX86_BUILTIN_PCMPESTRZ128,
+ IX86_BUILTIN_PCMPISTRI128,
+ IX86_BUILTIN_PCMPISTRM128,
+ IX86_BUILTIN_PCMPISTRA128,
+ IX86_BUILTIN_PCMPISTRC128,
+ IX86_BUILTIN_PCMPISTRO128,
+ IX86_BUILTIN_PCMPISTRS128,
+ IX86_BUILTIN_PCMPISTRZ128,
+
+ IX86_BUILTIN_PCMPGTQ,
+
+ /* TFmode support builtins. */
+ IX86_BUILTIN_INFQ,
+ IX86_BUILTIN_FABSQ,
+ IX86_BUILTIN_COPYSIGNQ,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ IX86_BUILTIN_MAX
+};
+
+#define def_builtin(MASK, NAME, TYPE, CODE) \
+do { \
+ if ((MASK) & target_flags \
+ && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
+ lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
+ NULL, NULL_TREE); \
+} while (0)
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Like def_builtin, but also marks the function decl "const". */
+
+static inline tree
+def_builtin_const (int mask, const char *name, tree type,
+ enum ix86_builtins code)
+{
+ tree decl = NULL_TREE;
+ if ((mask) & target_flags
+ && (!((mask) & MASK_64BIT) || TARGET_64BIT))
+ decl = lang_hooks.builtin_function (name, type, code, BUILT_IN_MD,
+ NULL, NULL_TREE);
+
+ if (decl)
+ TREE_READONLY (decl) = 1;
+ return decl;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Bits for builtin_description.flag. */
+
+/* Set when we don't support the comparison natively, and should
+ swap_comparison in order to support it. */
+#define BUILTIN_DESC_SWAP_OPERANDS 1
+
+struct builtin_description
+{
+ const unsigned int mask;
+ const enum insn_code icode;
+ const char *const name;
+ const enum ix86_builtins code;
+ const enum rtx_code comparison;
+ const unsigned int flag;
+};
+
+/* APPLE LOCAL begin 4299257 */
+static const struct builtin_description bdesc_comi[] =
+{
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
+};
+static const struct builtin_description bdesc_ucomi[] =
+{
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
+};
+/* APPLE LOCAL end 4299257 */
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+static const struct builtin_description bdesc_ptest[] =
+{
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
+};
+
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_crc32[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2 | MASK_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
+};
+
+/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
+static const struct builtin_description bdesc_sse_3arg[] =
+{
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
+};
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+static const struct builtin_description bdesc_2arg[] =
+{
+ /* SSE */
+ { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
+
+ { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
+
+ /* MMX */
+ { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ { MASK_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
+
+ /* Special. */
+ { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv4hi2si, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv2si2si, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv1di3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv1di2si, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv4hi2si, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv2si2si, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv1di3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv1di2si, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv4hi2si, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv2si2si, 0, IX86_BUILTIN_PSRADI, 0, 0 },
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
+
+ /* SSE2 */
+ { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
+
+ { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
+
+ /* SSE2 MMX */
+ { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
+
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
+
+ /* SSE3 MMX */
+ { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
+ /* APPLE LOCAL begin mainline */
+ { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
+
+ /* SSSE3 MMX */
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
+
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
+
+ /* SSE3 */
+ { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+ /* APPLE LOCAL begin mainline */
+
+ /* SSSE3 */
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
+
+ /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+};
+
+static void
+ix86_init_builtins (void)
+{
+ if (TARGET_MMX)
+ ix86_init_mmx_sse_builtins ();
+
+ /* APPLE LOCAL begin constant cfstrings */
+#ifdef SUBTARGET_INIT_BUILTINS
+ SUBTARGET_INIT_BUILTINS;
+#endif
+ /* APPLE LOCAL end constant cfstrings */
+}
+
+/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
+ is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
+ builtins. */
+static void
+ix86_init_mmx_sse_builtins (void)
+{
+ const struct builtin_description * d;
+ size_t i;
+
+ tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
+ tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
+ tree V2DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
+ tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
+ tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
+ tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
+ tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+ tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+ tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ tree V1DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
+
+ tree pchar_type_node = build_pointer_type (char_type_node);
+ tree pcchar_type_node = build_pointer_type (
+ build_type_variant (char_type_node, 1, 0));
+ tree pfloat_type_node = build_pointer_type (float_type_node);
+ tree pcfloat_type_node = build_pointer_type (
+ build_type_variant (float_type_node, 1, 0));
+ tree pv2si_type_node = build_pointer_type (V2SI_type_node);
+ tree pv2di_type_node = build_pointer_type (V2DI_type_node);
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ tree pv1di_type_node = build_pointer_type (V1DI_type_node);
+
+ /* Comparisons. */
+ tree int_ftype_v4sf_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf_v4sf
+ = build_function_type_list (V4SI_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ /* MMX/SSE/integer conversions. */
+ tree int_ftype_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int_ftype_v8qi
+ = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2SI_type_node, NULL_TREE);
+
+ /* Miscellaneous. */
+ tree v8qi_ftype_v4hi_v4hi
+ = build_function_type_list (V8QI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v2si_v2si
+ = build_function_type_list (V4HI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ tree v2si_ftype_v4hi_v4hi
+ = build_function_type_list (V2SI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_int
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v4hi_ftype_v4hi_v1di
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V1DI_type_node,
+ NULL_TREE);
+ tree v2si_ftype_v2si_int
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v1di
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ tree void_ftype_void
+ = build_function_type (void_type_node, void_list_node);
+ tree void_ftype_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
+ tree void_ftype_unsigned_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, NULL_TREE);
+ tree void_ftype_pcvoid_unsigned_unsigned
+ = build_function_type_list (void_type_node, const_ptr_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_void
+ = build_function_type (unsigned_type_node, void_list_node);
+ tree v2si_ftype_v4sf
+ = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
+ /* Loads/stores. */
+ tree void_ftype_v8qi_v8qi_pchar
+ = build_function_type_list (void_type_node,
+ V8QI_type_node, V8QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v4sf_ftype_pcfloat
+ = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
+ /* @@@ the type is bogus */
+ tree v4sf_ftype_v4sf_pv2si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, pv2si_type_node, NULL_TREE);
+ tree void_ftype_pv2si_v4sf
+ = build_function_type_list (void_type_node,
+ pv2si_type_node, V4SF_type_node, NULL_TREE);
+ tree void_ftype_pfloat_v4sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V4SF_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree void_ftype_pv1di_v1di
+ = build_function_type_list (void_type_node,
+ pv1di_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ tree void_ftype_pv2di_v2di
+ = build_function_type_list (void_type_node,
+ pv2di_type_node, V2DI_type_node, NULL_TREE);
+ /* Normal vector unops. */
+ tree v4sf_ftype_v4sf
+ = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ /* APPLE LOCAL begin mainline */
+ tree v16qi_ftype_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi
+ = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si
+ = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi
+ = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi
+ = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
+ /* APPLE LOCAL end mainline */
+
+ /* Normal vector binops. */
+ tree v4sf_ftype_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi_v8qi
+ = build_function_type_list (V8QI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_v4hi
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v2si
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4684674 */
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4684674 */
+ /* APPLE LOCAL begin 4656532 */
+ tree v1di_ftype_v1di_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node,
+ V1DI_type_node,
+ integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 */
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ tree v2si_ftype_v2sf
+ = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2si
+ = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si
+ = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf
+ = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf_v2sf
+ = build_function_type_list (V2SF_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2si_ftype_v2sf_v2sf
+ = build_function_type_list (V2SI_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree pint_type_node = build_pointer_type (integer_type_node);
+ tree pdouble_type_node = build_pointer_type (double_type_node);
+ tree pcdouble_type_node = build_pointer_type (
+ build_type_variant (double_type_node, 1, 0));
+ tree int_ftype_v2df_v2df
+ = build_function_type_list (integer_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+
+ tree void_ftype_pcvoid
+ = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
+ tree v4sf_ftype_v4si
+ = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf
+ = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v4si
+ = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v2df
+ = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v2si_ftype_v2df
+ = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v2df
+ = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2si
+ = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2df_ftype_v4sf
+ = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree int_ftype_v2df
+ = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2df
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v4sf
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_pcdouble
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree void_ftype_pdouble_v2df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V2DF_type_node, NULL_TREE);
+ tree void_ftype_pint_int
+ = build_function_type_list (void_type_node,
+ pint_type_node, integer_type_node, NULL_TREE);
+ tree void_ftype_v16qi_v16qi_pchar
+ = build_function_type_list (void_type_node,
+ V16QI_type_node, V16QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v2df_ftype_pcdouble
+ = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, V2DI_type_node, NULL_TREE);
+ tree v2di_ftype_v2df_v2df
+ = build_function_type_list (V2DI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_int
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL begin mainline */
+ tree v2di_ftype_v2di_v2di_int
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end mainline */
+ tree v4si_ftype_v4si_int
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, integer_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_int
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, integer_type_node, NULL_TREE);
+ tree v4si_ftype_v8hi_v8hi
+ = build_function_type_list (V4SI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v1di_ftype_v8qi_v8qi
+ = build_function_type_list (V1DI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v1di_ftype_v2si_v2si
+ = build_function_type_list (V1DI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ tree v2di_ftype_v16qi_v16qi
+ = build_function_type_list (V2DI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v2di_ftype_v4si_v4si
+ = build_function_type_list (V2DI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree int_ftype_v16qi
+ = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
+ tree v16qi_ftype_pcchar
+ = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
+ tree void_ftype_pchar_v16qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V16QI_type_node, NULL_TREE);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ tree v2di_ftype_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v16qi
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree v8hi_ftype_v16qi
+ = build_function_type_list (V8HI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v16qi
+ = build_function_type_list (V4SI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v16qi
+ = build_function_type_list (V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8hi
+ = build_function_type_list (V4SI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v8hi
+ = build_function_type_list (V2DI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v4si
+ = build_function_type_list (V2DI_type_node, V4SI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_pv2di
+ = build_function_type_list (V2DI_type_node, pv2di_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_int
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi_int
+ = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ V8HI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4si_v4si_int
+ = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ V4SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v2di_v2di
+ = build_function_type_list (integer_type_node,
+ V2DI_type_node, V2DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_v16qi_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ tree float80_type;
+ tree float128_type;
+ tree ftype;
+
+ /* The __float80 type. */
+ if (TYPE_MODE (long_double_type_node) == XFmode)
+ (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+ "__float80");
+ else
+ {
+ /* The __float80 type. */
+ float80_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (float80_type) = 80;
+ layout_type (float80_type);
+ (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
+ }
+
+ if (TARGET_64BIT)
+ {
+ float128_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (float128_type) = 128;
+ layout_type (float128_type);
+ (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Add all SSE builtins that are more or less simple operations on
+ three operands. */
+ for (i = 0, d = bdesc_sse_3arg;
+ i < ARRAY_SIZE (bdesc_sse_3arg);
+ i++, d++)
+ {
+ /* Use one of the operands; the target can have a different mode for
+ mask-generating compares. */
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi_v16qi_int;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi_v8hi_int;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si_v4si_int;
+ break;
+ case V2DImode:
+ type = v2di_ftype_v2di_v2di_int;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df_v2df_int;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf_v4sf_int;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Override for variable blends. */
+ switch (d->icode)
+ {
+ case CODE_FOR_sse4_1_blendvpd:
+ type = v2df_ftype_v2df_v2df_v2df;
+ break;
+ case CODE_FOR_sse4_1_blendvps:
+ type = v4sf_ftype_v4sf_v4sf_v4sf;
+ break;
+ case CODE_FOR_sse4_1_pblendvb:
+ type = v16qi_ftype_v16qi_v16qi_v16qi;
+ break;
+ default:
+ break;
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ /* Add all builtins that are more or less simple operations on two
+ operands. */
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ {
+ /* Use one of the operands; the target can have a different mode for
+ mask-generating compares. */
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi_v16qi;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi_v8hi;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si_v4si;
+ break;
+ case V2DImode:
+ type = v2di_ftype_v2di_v2di;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df_v2df;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf_v4sf;
+ break;
+ case V8QImode:
+ type = v8qi_ftype_v8qi_v8qi;
+ break;
+ case V4HImode:
+ type = v4hi_ftype_v4hi_v4hi;
+ break;
+ case V2SImode:
+ type = v2si_ftype_v2si_v2si;
+ break;
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ case V1DImode:
+ type = v1di_ftype_v1di_v1di;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Override for comparisons. */
+ if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+ || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
+ type = v4si_ftype_v4sf_v4sf;
+
+ if (d->icode == CODE_FOR_sse2_maskcmpv2df3
+ || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
+ type = v2di_ftype_v2df_v2df;
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL begin mainline */
+ /* Add all builtins that are more or less simple operations on 1 operand. */
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ {
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf;
+ break;
+ case V8QImode:
+ type = v8qi_ftype_v8qi;
+ break;
+ case V4HImode:
+ type = v4hi_ftype_v4hi;
+ break;
+ case V2SImode:
+ type = v2si_ftype_v2si;
+ break;
+
+ default:
+ abort ();
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* pcmpestr[im] insns. */
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
+ else
+ ftype = int_ftype_v16qi_int_v16qi_int_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+
+ /* pcmpistr[im] insns. */
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ ftype = v16qi_ftype_v16qi_v16qi_int;
+ else
+ ftype = int_ftype_v16qi_v16qi_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Add the remaining MMX insns with somewhat more complicated types. */
+ def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSLLW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSLLD);
+ def_builtin (MASK_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRLW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRLD);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+
+ def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRAW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRAD);
+ def_builtin (MASK_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
+ def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
+
+ /* APPLE LOCAL 4299257 */
+ /* comi insns. */
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->mask == MASK_SSE2)
+ def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ else
+ def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+
+ /* APPLE LOCAL begin 4299257 */
+ /* ucomi insns. */
+ for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
+ if (d->mask == MASK_SSE2)
+ def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ else
+ def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+ /* APPLE LOCAL end 4299257 */
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* ptest insns. */
+ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
+ def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
+ def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
+ def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+ def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+ def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pv1di_v1di, IX86_BUILTIN_MOVNTQ);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
+ def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
+
+ /* Original 3DNow! */
+ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+
+ /* 3DNow! extension as used in the Athlon CPU. */
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+
+ /* SSE2 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
+ def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
+
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
+ /* APPLE LOCAL 5919583 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128_BYTESHIFT);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
+ /* APPLE LOCAL 5919583 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128_BYTESHIFT);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
+
+ /* Prescott New Instructions. */
+ def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
+ void_ftype_pcvoid_unsigned_unsigned,
+ IX86_BUILTIN_MONITOR);
+ def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
+ void_ftype_unsigned_unsigned,
+ IX86_BUILTIN_MWAIT);
+ def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
+ v4sf_ftype_v4sf,
+ IX86_BUILTIN_MOVSHDUP);
+ def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
+ v4sf_ftype_v4sf,
+ IX86_BUILTIN_MOVSLDUP);
+ def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
+ v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
+
+ /* APPLE LOCAL begin 4099020 */
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ);
+ ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ);
+ ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ);
+ /* APPLE LOCAL end 4099020 */
+
+ /* APPLE LOCAL begin 4656532 */
+ /* Merom New Instructions. */
+ def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
+ v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
+ def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", v1di_ftype_v1di_v1di_int,
+ IX86_BUILTIN_PALIGNR);
+
+ /* APPLE LOCAL end 4656532 */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1. */
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
+
+ /* SSE4.2. */
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_char_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
+ ftype = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
+
+ /* AMDFAM10 SSE4A New built-ins */
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Access to the vec_init patterns. */
+ ftype = build_function_type_list (V2SI_type_node, integer_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
+ ftype, IX86_BUILTIN_VEC_INIT_V2SI);
+
+ ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
+ ftype, IX86_BUILTIN_VEC_INIT_V4HI);
+
+ ftype = build_function_type_list (V8QI_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
+ ftype, IX86_BUILTIN_VEC_INIT_V8QI);
+
+ /* Access to the vec_extract patterns. */
+ ftype = build_function_type_list (double_type_node, V2DF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
+ ftype, IX86_BUILTIN_VEC_EXT_V2DF);
+
+ ftype = build_function_type_list (long_long_integer_type_node,
+ V2DI_type_node, integer_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
+ ftype, IX86_BUILTIN_VEC_EXT_V2DI);
+
+ ftype = build_function_type_list (float_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
+ ftype, IX86_BUILTIN_VEC_EXT_V4SF);
+
+ ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
+ ftype, IX86_BUILTIN_VEC_EXT_V4SI);
+
+ /* APPLE LOCAL begin radar 4469713 */
+ /* The return type of the builtin function should be an unsigned instead
+ of a signed type. */
+ ftype = build_function_type_list (unsigned_intHI_type_node, V8HI_type_node,
+ /* APPLE LOCAL end radar 4469713 */
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
+ ftype, IX86_BUILTIN_VEC_EXT_V8HI);
+
+ ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
+ ftype, IX86_BUILTIN_VEC_EXT_V4HI);
+
+ ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
+ ftype, IX86_BUILTIN_VEC_EXT_V2SI);
+
+ ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Access to the vec_set patterns. */
+ ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ intDI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
+
+ ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+ float_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
+
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ intSI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
+ ftype, IX86_BUILTIN_VEC_SET_V8HI);
+
+ ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
+ ftype, IX86_BUILTIN_VEC_SET_V4HI);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ intQI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+ where we expect a vector. To avoid crashing, use one of the vector
+ clear instructions. */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+ if (x == const0_rtx)
+ x = CONST0_RTX (mode);
+ return x;
+}
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Subroutine of ix86_expand_builtin to take care of SSE insns with
+ 4 operands. The third argument must be a constant smaller than 8
+ bits or xmm0. */
+
+static rtx
+ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+ enum machine_mode mode3 = insn_data[icode].operand[3].mode;
+
+ if (VECTOR_MODE_P (mode1))
+ op0 = safe_vector_operand (op0, mode1);
+ if (VECTOR_MODE_P (mode2))
+ op1 = safe_vector_operand (op1, mode2);
+ if (VECTOR_MODE_P (mode3))
+ op2 = safe_vector_operand (op2, mode3);
+
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if ((optimize && !register_operand (op1, mode2))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_blendvpd:
+ case CODE_FOR_sse4_1_blendvps:
+ case CODE_FOR_sse4_1_pblendvb:
+ op2 = copy_to_mode_reg (mode3, op2);
+ break;
+
+ case CODE_FOR_sse4_1_roundsd:
+ case CODE_FOR_sse4_1_roundss:
+ error ("the third argument must be a 4-bit immediate");
+ return const0_rtx;
+
+ default:
+ error ("the third argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
+
+static rtx
+ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (optimize
+ || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Subroutine of ix86_expand_builtin to take care of binop insns. */
+
+static rtx
+ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
+{
+ rtx pat, xops[3];
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (GET_MODE (op1) == SImode && mode1 == TImode)
+ {
+ rtx x = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadd (x, op1));
+ op1 = gen_lowpart (TImode, x);
+ }
+
+ /* The insn must want input operands in the same modes as the
+ result. */
+ gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+ && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ /* ??? Using ix86_fixup_binary_operands is problematic when
+ we've got mismatched modes. Fake it. */
+
+ xops[0] = target;
+ xops[1] = op0;
+ xops[2] = op1;
+
+ if (tmode == mode0 && tmode == mode1)
+ {
+ target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+ op0 = xops[1];
+ op1 = xops[2];
+ }
+ else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
+ {
+ op0 = force_reg (mode0, op0);
+ op1 = force_reg (mode1, op1);
+ target = gen_reg_rtx (tmode);
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of stores. */
+
+static rtx
+ix86_expand_store_builtin (enum insn_code icode, tree arglist)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (op0, op1);
+ if (pat)
+ emit_insn (pat);
+ return 0;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of unop insns. */
+
+static rtx
+ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
+ rtx target, int do_load)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ rtx op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ if (do_load)
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ else
+ {
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_roundpd:
+ case CODE_FOR_sse4_1_roundps:
+ {
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ {
+ error ("the second argument must be a 4-bit immediate");
+ return const0_rtx;
+ }
+ pat = GEN_FCN (icode) (target, op0, op1);
+ }
+ break;
+ default:
+ pat = GEN_FCN (icode) (target, op0);
+ break;
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
+ sqrtss, rsqrtss, rcpss. */
+
+static rtx
+ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ rtx op1, op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+
+ op1 = op0;
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
+ op1 = copy_to_mode_reg (mode0, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
+
+static rtx
+ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = gen_reg_rtx (mode1);
+ emit_move_insn (tmp, op1);
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+ pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comi insns. */
+
+static rtx
+ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* APPLE LOCAL begin 4299257 */
+/* Subroutine of ix86_expand_builtin to take care of ucomi insns. */
+
+static rtx
+ix86_expand_sse_ucomi (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode scalar_mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ gcc_assert (mode0 == V4SFmode || mode0 == V2DFmode);
+ gcc_assert (mode1 == V4SFmode || mode1 == V2DFmode);
+
+ scalar_mode = (mode0 == V4SFmode) ? SFmode : DFmode;
+ op0 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode0, op0), 0);
+ op1 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode1, op1), 0);
+
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ if (ix86_expand_setcc (comparison, target))
+ return SUBREG_REG (target);
+
+ return NULL_RTX;
+}
+/* APPLE LOCAL end 4299257 */
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp))));
+ tree arg4 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp)))));
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Return the integer constant in ARG. Constrain it to be in the range
+ of the subparts of VEC_TYPE; issue an error if not. */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+ unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+ if (!host_integerp (arg, 1)
+ || (elt = tree_low_cst (arg, 1), elt > max))
+ {
+ error ("selector must be an integer constant in the range 0..%wi", max);
+ return 0;
+ }
+
+ return elt;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_init. We DO have language-level syntax for this, in
+ the form of (type){ init-list }. Except that since we can't place emms
+ instructions from inside the compiler, we can't allow the use of MMX
+ registers unless the user explicitly asks for it. So we do *not* define
+ vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
+ we have builtins invoked by mmintrin.h that gives us license to emit
+ these sorts of instructions. */
+
+static rtx
+ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
+{
+ enum machine_mode tmode = TYPE_MODE (type);
+ enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+ int i, n_elt = GET_MODE_NUNITS (tmode);
+ rtvec v = rtvec_alloc (n_elt);
+
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
+ {
+ rtx x = expand_normal (TREE_VALUE (arglist));
+ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+ }
+
+ gcc_assert (arglist == NULL);
+
+ if (!target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
+ had a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_ext_builtin (tree arglist, rtx target)
+{
+ enum machine_mode tmode, mode0;
+ tree arg0, arg1;
+ int elt;
+ rtx op0;
+
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+
+ op0 = expand_normal (arg0);
+ elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+ tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ mode0 = TYPE_MODE (TREE_TYPE (arg0));
+ gcc_assert (VECTOR_MODE_P (mode0));
+
+ op0 = force_reg (mode0, op0);
+
+ if (optimize || !target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_extract (true, target, op0, elt);
+
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
+ a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_set_builtin (tree arglist)
+{
+ enum machine_mode tmode, mode1;
+ tree arg0, arg1, arg2;
+ int elt;
+ rtx op0, op1, target;
+
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+
+ tmode = TYPE_MODE (TREE_TYPE (arg0));
+ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
+ elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+ op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+ op0 = force_reg (tmode, op0);
+ op1 = force_reg (mode1, op1);
+
+ /* OP0 is the source of these builtin functions and shouldn't be
+ modified. Create a copy, use it and return it as target. */
+ target = gen_reg_rtx (tmode);
+ emit_move_insn (target, op0);
+ ix86_expand_vector_set (true, target, op1, elt);
+
+ return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+ const struct builtin_description *d;
+ size_t i;
+ enum insn_code icode;
+ tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+ tree arglist = TREE_OPERAND (exp, 1);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ tree arg0, arg1, arg2, arg3;
+ rtx op0, op1, op2, op3, pat;
+ /* APPLE LOCAL ssse3 */
+ enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+ switch (fcode)
+ {
+ case IX86_BUILTIN_EMMS:
+ emit_insn (gen_mmx_emms ());
+ return 0;
+
+ case IX86_BUILTIN_SFENCE:
+ emit_insn (gen_sse_sfence ());
+ return 0;
+
+ case IX86_BUILTIN_MASKMOVQ:
+ case IX86_BUILTIN_MASKMOVDQU:
+ icode = (fcode == IX86_BUILTIN_MASKMOVQ
+ ? CODE_FOR_mmx_maskmovq
+ : CODE_FOR_sse2_maskmovdqu);
+ /* Note the arg order is different from the operand order. */
+ arg1 = TREE_VALUE (arglist);
+ arg2 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ op0 = force_reg (Pmode, op0);
+ op0 = gen_rtx_MEM (mode1, op0);
+
+ if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ pat = GEN_FCN (icode) (op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return 0;
+
+ case IX86_BUILTIN_SQRTSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
+ case IX86_BUILTIN_RSQRTSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
+ case IX86_BUILTIN_RCPSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
+
+ /* APPLE LOCAL begin 4099020 */
+ case IX86_BUILTIN_LOADQ:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1);
+
+ case IX86_BUILTIN_MOVQ:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0);
+
+ case IX86_BUILTIN_STOREQ:
+ return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist);
+ /* APPLE LOCAL end 4099020 */
+
+ case IX86_BUILTIN_LOADUPS:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
+
+ case IX86_BUILTIN_STOREUPS:
+ return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
+
+ case IX86_BUILTIN_LOADHPS:
+ case IX86_BUILTIN_LOADLPS:
+ case IX86_BUILTIN_LOADHPD:
+ case IX86_BUILTIN_LOADLPD:
+ icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
+ : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
+ : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
+ : CODE_FOR_sse2_loadlpd);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+
+ op0 = force_reg (mode0, op0);
+ op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_STOREHPS:
+ case IX86_BUILTIN_STORELPS:
+ icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
+ : CODE_FOR_sse_storelps);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ op1 = force_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return const0_rtx;
+
+ case IX86_BUILTIN_MOVNTPS:
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
+ case IX86_BUILTIN_MOVNTQ:
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv1di, arglist);
+
+ case IX86_BUILTIN_LDMXCSR:
+ op0 = expand_normal (TREE_VALUE (arglist));
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_move_insn (target, op0);
+ emit_insn (gen_sse_ldmxcsr (target));
+ return 0;
+
+ case IX86_BUILTIN_STMXCSR:
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_insn (gen_sse_stmxcsr (target));
+ return copy_to_mode_reg (SImode, target);
+
+ case IX86_BUILTIN_SHUFPS:
+ case IX86_BUILTIN_SHUFPD:
+ icode = (fcode == IX86_BUILTIN_SHUFPS
+ ? CODE_FOR_sse_shufps
+ : CODE_FOR_sse2_shufpd);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ tmode = insn_data[icode].operand[0].mode;
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+ mode2 = insn_data[icode].operand[3].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+ {
+ /* @@@ better error message */
+ error ("mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSHUFW:
+ case IX86_BUILTIN_PSHUFD:
+ case IX86_BUILTIN_PSHUFHW:
+ case IX86_BUILTIN_PSHUFLW:
+ icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
+ : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
+ : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
+ : CODE_FOR_mmx_pshufw);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ /* @@@ better error message */
+ error ("mask must be an immediate");
+ return const0_rtx;
+ }
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSLLWI128:
+ icode = CODE_FOR_ashlv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSLLDI128:
+ icode = CODE_FOR_ashlv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSLLQI128:
+ icode = CODE_FOR_ashlv2di3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRAWI128:
+ icode = CODE_FOR_ashrv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRADI128:
+ icode = CODE_FOR_ashrv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLWI128:
+ icode = CODE_FOR_lshrv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLDI128:
+ icode = CODE_FOR_lshrv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLQI128:
+ icode = CODE_FOR_lshrv2di3;
+ goto do_pshifti;
+ do_pshifti:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ /* APPLE LOCAL begin radar 5543378 mainline candidate */
+ if (GET_CODE (op1) == CONST_INT)
+ {
+ if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
+ op1 = GEN_INT (255);
+ }
+ else
+ {
+ mode2 = insn_data[icode].operand[2].mode;
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ op1 = copy_to_reg (op1);
+ if (GET_MODE (op1) != mode2)
+ op1 = convert_to_mode (mode2, op1, 0);
+ }
+ }
+ /* APPLE LOCAL end radar 5543378 mainline candidate */
+
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_reg (op0);
+
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSLLW128:
+ icode = CODE_FOR_ashlv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLD128:
+ icode = CODE_FOR_ashlv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLQ128:
+ icode = CODE_FOR_ashlv2di3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAW128:
+ icode = CODE_FOR_ashrv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAD128:
+ icode = CODE_FOR_ashrv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLW128:
+ icode = CODE_FOR_lshrv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLD128:
+ icode = CODE_FOR_lshrv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLQ128:
+ icode = CODE_FOR_lshrv2di3;
+ goto do_pshift;
+ do_pshift:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_reg (op0);
+
+ op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = copy_to_reg (op1);
+
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ /* APPLE LOCAL begin 5919583 */
+ case IX86_BUILTIN_PSLLDQI128:
+ case IX86_BUILTIN_PSRLDQI128:
+ case IX86_BUILTIN_PSLLDQI128_BYTESHIFT:
+ case IX86_BUILTIN_PSRLDQI128_BYTESHIFT:
+ icode = ((fcode == IX86_BUILTIN_PSLLDQI128
+ || fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT)
+ ? CODE_FOR_sse2_ashlti3
+ : CODE_FOR_sse2_lshrti3);
+ /* APPLE LOCAL end 5919583 */
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ /* APPLE LOCAL begin 591583 */
+ if (! CONST_INT_P (op1))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ /* The _mm_srli_si128/_mm_slli_si128 primitives are defined with
+ a byte-shift count; inside of GCC, we prefer to specify the
+ width of a shift in bits. The original non-BYTESHIFT
+ primitives were problematic due to the "*8" in their macro
+ bodies; we have moved the "*8" here to resolve this. The
+ original builtins are still supported because many developers
+ rely upon them. */
+ if (fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT
+ || fcode == IX86_BUILTIN_PSRLDQI128_BYTESHIFT)
+ op1 = gen_rtx_CONST_INT (SImode, INTVAL (op1) * 8);
+ /* APPLE LOCAL end 591583 */
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (V2DImode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
+ op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_FEMMS:
+ emit_insn (gen_mmx_femms ());
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PAVGUSB:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
+
+ case IX86_BUILTIN_PF2ID:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
+
+ case IX86_BUILTIN_PFACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFADD:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPEQ:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGE:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGT:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMAX:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMIN:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMUL:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCP:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFRCPIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCPIT2:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQRT:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFSUB:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFSUBR:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PI2FD:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PMULHRW:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
+
+ case IX86_BUILTIN_PF2IW:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
+
+ case IX86_BUILTIN_PFNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFPNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PI2FW:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSI:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSF:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_SQRTSD:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
+ case IX86_BUILTIN_LOADUPD:
+ return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
+ case IX86_BUILTIN_STOREUPD:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
+
+ case IX86_BUILTIN_MFENCE:
+ emit_insn (gen_sse2_mfence ());
+ return 0;
+ case IX86_BUILTIN_LFENCE:
+ emit_insn (gen_sse2_lfence ());
+ return 0;
+
+ case IX86_BUILTIN_CLFLUSH:
+ arg0 = TREE_VALUE (arglist);
+ op0 = expand_normal (arg0);
+ icode = CODE_FOR_sse2_clflush;
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+
+ emit_insn (gen_sse2_clflush (op0));
+ return 0;
+
+ case IX86_BUILTIN_MOVNTPD:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
+ case IX86_BUILTIN_MOVNTDQ:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
+ case IX86_BUILTIN_MOVNTI:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
+
+ case IX86_BUILTIN_LOADDQU:
+ return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
+ case IX86_BUILTIN_STOREDQU:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
+
+ case IX86_BUILTIN_MONITOR:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (Pmode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ if (!REG_P (op2))
+ op2 = copy_to_mode_reg (SImode, op2);
+ if (!TARGET_64BIT)
+ emit_insn (gen_sse3_monitor (op0, op1, op2));
+ else
+ emit_insn (gen_sse3_monitor64 (op0, op1, op2));
+ return 0;
+
+ case IX86_BUILTIN_MWAIT:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (SImode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ emit_insn (gen_sse3_mwait (op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_LDDQU:
+ return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
+ target, 1);
+ /* APPLE LOCAL begin mainline */
+ case IX86_BUILTIN_PALIGNR:
+ case IX86_BUILTIN_PALIGNR128:
+ if (fcode == IX86_BUILTIN_PALIGNR)
+ {
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ icode = CODE_FOR_ssse3_palignrv1di;
+ mode = V1DImode;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ }
+ else
+ {
+ icode = CODE_FOR_ssse3_palignrti;
+ mode = V2DImode;
+ }
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (mode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
+ op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_MOVNTDQA:
+ return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, arglist,
+ target, 1);
+
+ case IX86_BUILTIN_MOVNTSD:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
+
+ case IX86_BUILTIN_MOVNTSS:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
+
+ case IX86_BUILTIN_INSERTQ:
+ case IX86_BUILTIN_EXTRQ:
+ icode = (fcode == IX86_BUILTIN_EXTRQ
+ ? CODE_FOR_sse4a_extrq
+ : CODE_FOR_sse4a_insertq);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_EXTRQI:
+ icode = CODE_FOR_sse4a_extrqi;
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_INSERTQI:
+ icode = CODE_FOR_sse4a_insertqi;
+ arg0 = TREE_VALUE (exp);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ case IX86_BUILTIN_VEC_INIT_V2SI:
+ case IX86_BUILTIN_VEC_INIT_V4HI:
+ case IX86_BUILTIN_VEC_INIT_V8QI:
+ return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
+
+ case IX86_BUILTIN_VEC_EXT_V2DF:
+ case IX86_BUILTIN_VEC_EXT_V2DI:
+ case IX86_BUILTIN_VEC_EXT_V4SF:
+ case IX86_BUILTIN_VEC_EXT_V4SI:
+ case IX86_BUILTIN_VEC_EXT_V8HI:
+ case IX86_BUILTIN_VEC_EXT_V16QI:
+ case IX86_BUILTIN_VEC_EXT_V2SI:
+ case IX86_BUILTIN_VEC_EXT_V4HI:
+ return ix86_expand_vec_ext_builtin (arglist, target);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V2DI:
+ case IX86_BUILTIN_VEC_SET_V4SF:
+ case IX86_BUILTIN_VEC_SET_V4SI:
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V8HI:
+ case IX86_BUILTIN_VEC_SET_V4HI:
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V16QI:
+ return ix86_expand_vec_set_builtin (arglist);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_INFQ:
+ {
+ REAL_VALUE_TYPE inf;
+ rtx tmp;
+
+ real_inf (&inf);
+ tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
+
+ tmp = validize_mem (force_const_mem (mode, tmp));
+
+ if (target == 0)
+ target = gen_reg_rtx (mode);
+
+ emit_move_insn (target, tmp);
+ return target;
+ }
+
+ case IX86_BUILTIN_FABSQ:
+ return ix86_expand_unop_builtin (CODE_FOR_abstf2, arglist, target, 0);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ default:
+ break;
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ for (i = 0, d = bdesc_sse_3arg;
+ i < ARRAY_SIZE (bdesc_sse_3arg);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_4_operands_builtin (d->icode,
+ arglist,
+ target);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ {
+ /* Compares are treated specially. */
+ if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+ || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
+ || d->icode == CODE_FOR_sse2_maskcmpv2df3
+ || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
+ return ix86_expand_sse_compare (d, arglist, target);
+
+ return ix86_expand_binop_builtin (d->icode, arglist, target);
+ }
+
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
+
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_comi (d, arglist, target);
+
+ /* APPLE LOCAL begin 4299257 */
+ for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_ucomi (d, arglist, target);
+ /* APPLE LOCAL end 4299257 */
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_ptest (d, arglist, target);
+
+ for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_crc32 (d->icode, arglist, target);
+
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpestr (d, arglist, target);
+
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpistr (d, arglist, target);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ gcc_unreachable ();
+}
+
+/* Store OPERAND to the memory after reload is completed. This means
+ that we can't easily use assign_stack_local. */
+rtx
+ix86_force_to_memory (enum machine_mode mode, rtx operand)
+{
+ rtx result;
+
+ gcc_assert (reload_completed);
+ if (TARGET_RED_ZONE)
+ {
+ result = gen_rtx_MEM (mode,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (-RED_ZONE_SIZE)));
+ emit_move_insn (result, operand);
+ }
+ else if (!TARGET_RED_ZONE && TARGET_64BIT)
+ {
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ operand = gen_lowpart (DImode, operand);
+ /* FALLTHRU */
+ case DImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (DImode,
+ gen_rtx_PRE_DEC (DImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ else
+ {
+ switch (mode)
+ {
+ case DImode:
+ {
+ rtx operands[2];
+ split_di (&operand, 1, operands, operands + 1);
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[1]));
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[0]));
+ }
+ break;
+ case HImode:
+ /* Store HImodes as SImodes. */
+ operand = gen_lowpart (SImode, operand);
+ /* FALLTHRU */
+ case SImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (GET_MODE (operand),
+ gen_rtx_PRE_DEC (SImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ return result;
+}
+
+/* Free operand from the memory. */
+void
+ix86_free_from_memory (enum machine_mode mode)
+{
+ if (!TARGET_RED_ZONE)
+ {
+ int size;
+
+ if (mode == DImode || TARGET_64BIT)
+ size = 8;
+ else
+ size = 4;
+ /* Use LEA to deallocate stack space. In peephole2 it will be converted
+ to pop or add instruction if registers are available. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (size))));
+ }
+}
+
+/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+enum reg_class
+ix86_preferred_reload_class (rtx x, enum reg_class class)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* We're only allowed to return a subclass of CLASS. Many of the
+ following checks fail for NO_REGS, so eliminate that early. */
+ if (class == NO_REGS)
+ return NO_REGS;
+
+ /* All classes can load zeros. */
+ if (x == CONST0_RTX (mode))
+ return class;
+
+ /* Force constants into memory if we are loading a (nonzero) constant into
+ an MMX or SSE register. This is because there are no MMX/SSE instructions
+ to load from a constant. */
+ if (CONSTANT_P (x)
+ && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
+ return NO_REGS;
+
+ /* APPLE LOCAL begin */
+ /* MERGE FIXME - ensure that 3501055 is fixed. */
+ /* MERGE FIXME - ensure that 4206991 is fixed. */
+ /* APPLE LOCAL end */
+ /* Prefer SSE regs only, if we can use them for math. */
+ if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+ return SSE_CLASS_P (class) ? class : NO_REGS;
+
+ /* Floating-point constants need more complex checks. */
+ if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
+ {
+ /* General regs can load everything. */
+ if (reg_class_subset_p (class, GENERAL_REGS))
+ return class;
+
+ /* Floats can load 0 and 1 plus some others. Note that we eliminated
+ zero above. We only want to wind up preferring 80387 registers if
+ we plan on doing computation with them. */
+ if (TARGET_80387
+ && standard_80387_constant_p (x))
+ {
+ /* Limit class to non-sse. */
+ if (class == FLOAT_SSE_REGS)
+ return FLOAT_REGS;
+ if (class == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ if (class == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
+ return class;
+ }
+
+ return NO_REGS;
+ }
+
+ /* Generally when we see PLUS here, it's the function invariant
+ (plus soft-fp const_int). Which can only be computed into general
+ regs. */
+ if (GET_CODE (x) == PLUS)
+ return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
+
+ /* QImode constants are easy to load, but non-constant QImode data
+ must go into Q_REGS. */
+ if (GET_MODE (x) == QImode && !CONSTANT_P (x))
+ {
+ if (reg_class_subset_p (class, Q_REGS))
+ return class;
+ if (reg_class_subset_p (Q_REGS, class))
+ return Q_REGS;
+ return NO_REGS;
+ }
+
+ return class;
+}
+
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+enum reg_class
+ix86_preferred_output_reload_class (rtx x, enum reg_class class)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* Restrict the output reload class to the register bank that we are doing
+ math on. If we would like not to return a subset of CLASS, reject this
+ alternative: if reload cannot do this, it will still use its choice. */
+ mode = GET_MODE (x);
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
+
+ if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (class == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ else if (class == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ else
+ return FLOAT_CLASS_P (class) ? class : NO_REGS;
+ }
+
+ return class;
+}
+
+/* If we are copying between general and FP registers, we need a memory
+ location. The same is true for SSE and MMX registers.
+
+ The macro can't work reliably when one of the CLASSES is class containing
+ registers from multiple units (SSE, MMX, integer). We avoid this by never
+ combining those units in single alternative in the machine description.
+ Ensure that this constraint holds to avoid unexpected surprises.
+
+ When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
+ enforce these sanity checks. */
+
+int
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+ enum machine_mode mode, int strict)
+{
+ if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
+ || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
+ || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
+ || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
+ || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
+ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+ {
+ gcc_assert (!strict);
+ return true;
+ }
+
+ if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
+ return true;
+
+ /* ??? This is a lie. We do have moves between mmx/general, and for
+ mmx/sse2. But by saying we need secondary memory we discourage the
+ register allocator from using the mmx registers unless needed. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
+ return true;
+
+ if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+ {
+ /* SSE1 doesn't have any direct moves from other classes. */
+ if (!TARGET_SSE2)
+ return true;
+
+ /* If the target says that inter-unit moves are more expensive
+ than moving through memory, then don't generate them. */
+ if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
+ return true;
+
+ /* Between SSE and general, we have moves no larger than word size. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return true;
+
+ /* ??? For the cost of one register reformat penalty, we could use
+ the same instructions to move SFmode and DFmode data, but the
+ relevant move patterns don't support those alternatives. */
+ if (mode == SFmode || mode == DFmode)
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if the registers in CLASS cannot represent the change from
+ modes FROM to TO. */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+ enum reg_class class)
+{
+ if (from == to)
+ return false;
+
+ /* x87 registers can't do subreg at all, as all values are reformatted
+ to extended precision. */
+ if (MAYBE_FLOAT_CLASS_P (class))
+ return true;
+
+ if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
+ {
+ /* Vector registers do not support QI or HImode loads. If we don't
+ disallow a change to these modes, reload will assume it's ok to
+ drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
+ the vec_dupv4hi pattern. */
+ if (GET_MODE_SIZE (from) < 4)
+ return true;
+
+ /* Vector registers do not support subreg with nonzero offsets, which
+ are otherwise valid for integer registers. Since we can't see
+ whether we have a nonzero offset from here, prohibit all
+ nonparadoxical subregs changing size. */
+ if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return the cost of moving data from a register in class CLASS1 to
+ one in class CLASS2.
+
+ It is not required that the cost always equal 2 when FROM is the same as TO;
+ on some machines it is expensive to move between registers if they are not
+ general registers. */
+
+int
+ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
+ enum reg_class class2)
+{
+ /* In case we require secondary memory, compute cost of the store followed
+ by load. In order to avoid bad register allocation choices, we need
+ for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
+
+ if (ix86_secondary_memory_needed (class1, class2, mode, 0))
+ {
+ int cost = 1;
+
+ cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
+ MEMORY_MOVE_COST (mode, class1, 1));
+ cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
+ MEMORY_MOVE_COST (mode, class2, 1));
+
+ /* In case of copying from general_purpose_register we may emit multiple
+ stores followed by single load causing memory size mismatch stall.
+ Count this as arbitrarily high cost of 20. */
+ if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ cost += 20;
+
+ /* In the case of FP/MMX moves, the registers actually overlap, and we
+ have to switch modes in order to treat them differently. */
+ if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
+ || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
+ cost += 20;
+
+ return cost;
+ }
+
+ /* Moves between SSE/MMX and integer unit are expensive. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
+ || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+ return ix86_cost->mmxsse_to_integer;
+ if (MAYBE_FLOAT_CLASS_P (class1))
+ return ix86_cost->fp_move;
+ if (MAYBE_SSE_CLASS_P (class1))
+ return ix86_cost->sse_move;
+ if (MAYBE_MMX_CLASS_P (class1))
+ return ix86_cost->mmx_move;
+ return 2;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+bool
+ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+ /* Flags and only flags can only hold CCmode values. */
+ if (CC_REGNO_P (regno))
+ return GET_MODE_CLASS (mode) == MODE_CC;
+ if (GET_MODE_CLASS (mode) == MODE_CC
+ || GET_MODE_CLASS (mode) == MODE_RANDOM
+ || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+ return 0;
+ if (FP_REGNO_P (regno))
+ return VALID_FP_MODE_P (mode);
+ if (SSE_REGNO_P (regno))
+ {
+ /* We implement the move patterns for all vector modes into and
+ out of SSE registers, even when no operation instructions
+ are available. */
+ return (VALID_SSE_REG_MODE (mode)
+ || VALID_SSE2_REG_MODE (mode)
+ || VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+ if (MMX_REGNO_P (regno))
+ {
+ /* We implement the move patterns for 3DNOW modes even in MMX mode,
+ so if the register is available at all, then we can move data of
+ the given mode into or out of it. */
+ return (VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+
+ if (mode == QImode)
+ {
+ /* Take care for QImode values - they can be in non-QI regs,
+ but then they do cause partial register stalls. */
+ if (regno < 4 || TARGET_64BIT)
+ return 1;
+ if (!TARGET_PARTIAL_REG_STALL)
+ return 1;
+ return reload_in_progress || reload_completed;
+ }
+ /* We handle both integer and floats in the general purpose registers. */
+ else if (VALID_INT_MODE_P (mode))
+ return 1;
+ else if (VALID_FP_MODE_P (mode))
+ return 1;
+ /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
+ on to use that value in smaller contexts, this can easily force a
+ pseudo to be allocated to GENERAL_REGS. Since this is no worse than
+ supporting DImode, allow it. */
+ else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
+ return 1;
+
+ return 0;
+}
+
+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
+ tieable integer mode. */
+
+static bool
+ix86_tieable_integer_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ return true;
+
+ case QImode:
+ return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
+
+ case DImode:
+ /* APPLE LOCAL 5695218 convert int to logical bool */
+ return !!TARGET_64BIT;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if MODE1 is accessible in a register that can hold MODE2
+ without copying. That is, all register classes that can hold MODE2
+ can also hold MODE1. */
+
+bool
+ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+ if (mode1 == mode2)
+ return true;
+
+ if (ix86_tieable_integer_mode_p (mode1)
+ && ix86_tieable_integer_mode_p (mode2))
+ return true;
+
+ /* MODE2 being XFmode implies fp stack or general regs, which means we
+ can tie any smaller floating point modes to it. Note that we do not
+ tie this with TFmode. */
+ if (mode2 == XFmode)
+ return mode1 == SFmode || mode1 == DFmode;
+
+ /* MODE2 being DFmode implies fp stack, general or sse regs, which means
+ that we can tie it with SFmode. */
+ if (mode2 == DFmode)
+ return mode1 == SFmode;
+
+ /* If MODE2 is only appropriate for an SSE register, then tie with
+ any other mode acceptable to SSE registers. */
+ if (GET_MODE_SIZE (mode2) >= 8
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+ return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
+
+ /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
+ with any other mode acceptable to MMX registers. */
+ if (GET_MODE_SIZE (mode2) == 8
+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
+ return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
+
+ return false;
+}
+
+/* Return the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost.
+
+ Model also increased moving costs of QImode registers in non
+ Q_REGS classes.
+ */
+int
+ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
+{
+ if (FLOAT_CLASS_P (class))
+ {
+ int index;
+ switch (mode)
+ {
+ case SFmode:
+ index = 0;
+ break;
+ case DFmode:
+ index = 1;
+ break;
+ case XFmode:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+ }
+ if (SSE_CLASS_P (class))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ case 16:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+ }
+ if (MMX_CLASS_P (class))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+ }
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ if (in)
+ return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
+ : ix86_cost->movzbl_load);
+ else
+ return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
+ : ix86_cost->int_store[0] + 4);
+ break;
+ case 2:
+ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+ default:
+ /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
+ if (mode == TFmode)
+ mode = XFmode;
+ return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
+ * (((int) GET_MODE_SIZE (mode)
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+ }
+}
+
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case CONST_INT:
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
+ *total = 3;
+ else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
+ *total = 2;
+ else if (flag_pic && SYMBOLIC_CONST (x)
+ && (!TARGET_64BIT
+ || (!GET_CODE (x) != LABEL_REF
+ && (GET_CODE (x) != SYMBOL_REF
+ || !SYMBOL_REF_LOCAL_P (x)))))
+ *total = 1;
+ else
+ *total = 0;
+ return true;
+
+ case CONST_DOUBLE:
+ if (mode == VOIDmode)
+ *total = 0;
+ else
+ switch (standard_80387_constant_p (x))
+ {
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default: /* Other constants */
+ *total = 2;
+ break;
+ case 0:
+ case -1:
+ /* Start with (MEM (SYMBOL_REF)), since that's where
+ it'll probably end up. Add a penalty for size. */
+ *total = (COSTS_N_INSNS (1)
+ + (flag_pic != 0 && !TARGET_64BIT)
+ + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+ break;
+ }
+ return true;
+
+ case ZERO_EXTEND:
+ /* The zero extensions is often completely free on x86_64, so make
+ it as cheap as possible. */
+ if (TARGET_64BIT && mode == DImode
+ && GET_MODE (XEXP (x, 0)) == SImode)
+ *total = 1;
+ else if (TARGET_ZERO_EXTEND_WITH_AND)
+ *total = ix86_cost->add;
+ else
+ *total = ix86_cost->movzx;
+ return false;
+
+ case SIGN_EXTEND:
+ *total = ix86_cost->movsx;
+ return false;
+
+ case ASHIFT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+ {
+ HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ if (value == 1)
+ {
+ *total = ix86_cost->add;
+ return false;
+ }
+ if ((value == 2 || value == 3)
+ && ix86_cost->lea <= ix86_cost->shift_const)
+ {
+ *total = ix86_cost->lea;
+ return false;
+ }
+ }
+ /* FALLTHRU */
+
+ case ROTATE:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ if (INTVAL (XEXP (x, 1)) > 32)
+ *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
+ else
+ *total = ix86_cost->shift_const * 2;
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == AND)
+ *total = ix86_cost->shift_var * 2;
+ else
+ *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
+ }
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ *total = ix86_cost->shift_const;
+ else
+ *total = ix86_cost->shift_var;
+ }
+ return false;
+
+ case MULT:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fmul;
+ return false;
+ }
+ else
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ int nbits;
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ for (nbits = 0; value != 0; value &= value - 1)
+ nbits++;
+ }
+ else
+ /* This is arbitrary. */
+ nbits = 7;
+
+ /* Compute costs correctly for widening multiplication. */
+ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
+ && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
+ == GET_MODE_SIZE (mode))
+ {
+ int is_mulwiden = 0;
+ enum machine_mode inner_mode = GET_MODE (op0);
+
+ if (GET_CODE (op0) == GET_CODE (op1))
+ is_mulwiden = 1, op1 = XEXP (op1, 0);
+ else if (GET_CODE (op1) == CONST_INT)
+ {
+ if (GET_CODE (op0) == SIGN_EXTEND)
+ is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
+ == INTVAL (op1);
+ else
+ is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
+ }
+
+ if (is_mulwiden)
+ op0 = XEXP (op0, 0), mode = GET_MODE (op0);
+ }
+
+ *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
+ + nbits * ix86_cost->mult_bit
+ + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
+
+ return true;
+ }
+
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fdiv;
+ else
+ *total = ix86_cost->divide[MODE_INDEX (mode)];
+ return false;
+
+ case PLUS:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fadd;
+ else if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+ {
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ /* FALLTHRU */
+
+ case MINUS:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fadd;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case AND:
+ case IOR:
+ case XOR:
+ if (!TARGET_64BIT && mode == DImode)
+ {
+ *total = (ix86_cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code)
+ << (GET_MODE (XEXP (x, 0)) != DImode))
+ + (rtx_cost (XEXP (x, 1), outer_code)
+ << (GET_MODE (XEXP (x, 1)) != DImode)));
+ return true;
+ }
+ /* FALLTHRU */
+
+ case NEG:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fchs;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case NOT:
+ if (!TARGET_64BIT && mode == DImode)
+ *total = ix86_cost->add * 2;
+ else
+ *total = ix86_cost->add;
+ return false;
+
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+ && XEXP (XEXP (x, 0), 1) == const1_rtx
+ && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
+ && XEXP (x, 1) == const0_rtx)
+ {
+ /* This kind of construct is implemented using test[bwl].
+ Treat it as if we had an AND. */
+ *total = (ix86_cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ + rtx_cost (const1_rtx, outer_code));
+ return true;
+ }
+ return false;
+
+ case FLOAT_EXTEND:
+ if (!TARGET_SSE_MATH
+ || mode == XFmode
+ || (mode == DFmode && !TARGET_SSE2))
+ /* For standard 80387 constants, raise the cost to prevent
+ compress_float_constant() to generate load from memory. */
+ switch (standard_80387_constant_p (XEXP (x, 0)))
+ {
+ case -1:
+ case 0:
+ *total = 0;
+ break;
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default:
+ *total = (x86_ext_80387_constants & TUNEMASK
+ || optimize_size
+ ? 1 : 0);
+ }
+ return false;
+
+ case ABS:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fabs;
+ return false;
+
+ case SQRT:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fsqrt;
+ return false;
+
+ case UNSPEC:
+ if (XINT (x, 1) == UNSPEC_TP)
+ *total = 0;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+#if TARGET_MACHO
+
+static int current_machopic_label_num;
+
+/* Given a symbol name and its associated stub, write out the
+ definition of the stub. */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+ unsigned int length;
+ char *binder_name, *symbol_name, lazy_ptr_name[32];
+ int label = ++current_machopic_label_num;
+
+ /* For 64-bit we shouldn't get here. */
+ gcc_assert (!TARGET_64BIT);
+
+ /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
+ symb = (*targetm.strip_name_encoding) (symb);
+
+ length = strlen (stub);
+ binder_name = alloca (length + 32);
+ GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
+
+ length = strlen (symb);
+ symbol_name = alloca (length + 32);
+ GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+ sprintf (lazy_ptr_name, "L%d$lz", label);
+
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ /* Choose one of four possible sections for this stub. */
+ if (MACHOPIC_ATT_STUB)
+ switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
+ else if (MACHOPIC_PURE)
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+ {
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
+ else
+ switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
+ }
+ else
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ switch_to_section (darwin_sections[machopic_symbol_stub_section]);
+
+ fprintf (file, "%s:\n", stub);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+ /* APPLE LOCAL begin use %ecx in stubs 4146993 */
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ if (MACHOPIC_ATT_STUB)
+ {
+ fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
+ }
+ else if (MACHOPIC_PURE)
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+ {
+ /* PIC stub. */
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ {
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
+ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
+ }
+ else
+ {
+ /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
+ fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
+ fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
+ }
+ fprintf (file, "\tjmp\t*%%ecx\n");
+ }
+ else /* 16-byte -mdynamic-no-pic stub. */
+ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
+
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
+ it needs no stub-binding-helper. */
+ if (MACHOPIC_ATT_STUB)
+ return;
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+
+ /* The "stub_binding_helper" is a fragment that gets executed only
+ once, the first time this stub is invoked (then it becomes "dead
+ code"). It asks the dynamic linker to set the
+ lazy_symbol_pointer to point at the function we want
+ (e.g. printf) so that subsequent invocations of this stub go
+ directly to that dynamically-linked callee. Other UN*X systems
+ use similar stubs, but those are generated by the static linker
+ and never appear in assembly files. */
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ fprintf (file, "%s:\n", binder_name);
+
+ /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */
+ if (MACHOPIC_PURE)
+ {
+ fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
+ fprintf (file, "\tpushl\t%%ecx\n");
+ }
+ else
+ fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name);
+
+ fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
+ /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */
+ /* APPLE LOCAL end use %ecx in stubs 4146993 */
+
+ /* APPLE LOCAL begin deep branch prediction pic-base. */
+ /* N.B. Keep the correspondence of these
+ 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
+ old-pic/new-pic/non-pic stubs; altering this will break
+ compatibility with existing dylibs. */
+ if (MACHOPIC_PURE)
+ {
+ /* PIC stubs. */
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
+ else
+ /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+ }
+ else
+ /* 16-byte -mdynamic-no-pic stub. */
+ switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
+
+ fprintf (file, "%s:\n", lazy_ptr_name);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+ fprintf (file, "\t.long\t%s\n", binder_name);
+}
+/* APPLE LOCAL end deep branch prediction pic-base */
+
+void
+darwin_x86_file_end (void)
+{
+ darwin_file_end ();
+ ix86_file_end ();
+}
+
+/* APPLE LOCAL begin 4457939 stack alignment mishandled */
+void
+ix86_darwin_init_expanders (void)
+{
+ /* <rdar://problem/4471596> stack alignment is not handled properly
+
+ Please remove this entire function when addressing this
+ Radar. Please be sure to delete the definition of INIT_EXPANDERS
+ in i386/darwin.h as well. */
+ /* Darwin/x86_32 stack pointer will be 16-byte aligned at every
+ CALL, but the frame pointer, when used, will be 8-bytes offset
+ from a 16-byte alignment (the size of the return address and the
+ saved frame pointer). */
+ if (cfun && cfun->emit
+ && cfun->emit->regno_pointer_align)
+ {
+ REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = STACK_BOUNDARY;
+ REGNO_POINTER_ALIGN (FRAME_POINTER_REGNUM) = BITS_PER_WORD;
+ REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_WORD;
+ REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = BITS_PER_WORD;
+ }
+}
+/* APPLE LOCAL end 4457939 stack alignment mishandled */
+#endif /* TARGET_MACHO */
+
+/* Order the registers for register allocator. */
+
+void
+x86_order_regs_for_local_alloc (void)
+{
+ int pos = 0;
+ int i;
+
+ /* First allocate the local general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* Global general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers come first in case we are doing FP math
+ using them. */
+ if (!TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* SSE registers. */
+ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers. */
+ if (TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* Initialize the rest of array as we do not allocate some registers
+ at all. */
+ while (pos < FIRST_PSEUDO_REGISTER)
+ reg_alloc_order [pos++] = 0;
+}
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+ struct attribute_spec.handler. */
+static tree
+ix86_handle_struct_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ tree *type = NULL;
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) == TYPE_DECL)
+ type = &TREE_TYPE (*node);
+ }
+ else
+ type = node;
+
+ if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+ || TREE_CODE (*type) == UNION_TYPE)))
+ {
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ else if ((is_attribute_p ("ms_struct", name)
+ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+ || ((is_attribute_p ("gcc_struct", name)
+ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+ {
+ warning (OPT_Wattributes, "%qs incompatible attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+static bool
+ix86_ms_bitfield_layout_p (tree record_type)
+{
+ return (TARGET_MS_BITFIELD_LAYOUT &&
+ !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+}
+
+/* Returns an expression indicating where the this parameter is
+ located on entry to the FUNCTION. */
+
+static rtx
+x86_this_parameter (tree function)
+{
+ tree type = TREE_TYPE (function);
+
+ if (TARGET_64BIT)
+ {
+ int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
+ return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
+ }
+
+ if (ix86_function_regparm (type, function) > 0)
+ {
+ tree parm;
+
+ parm = TYPE_ARG_TYPES (type);
+ /* Figure out whether or not the function has a variable number of
+ arguments. */
+ for (; parm; parm = TREE_CHAIN (parm))
+ if (TREE_VALUE (parm) == void_type_node)
+ break;
+ /* If not, the this parameter is in the first argument. */
+ if (parm)
+ {
+ int regno = 0;
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ regno = 2;
+ return gen_rtx_REG (SImode, regno);
+ }
+ }
+
+ if (aggregate_value_p (TREE_TYPE (type), type))
+ return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
+ else
+ return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
+}
+
+/* Determine whether x86_output_mi_thunk can succeed. */
+
+static bool
+x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT vcall_offset, tree function)
+{
+ /* 64-bit can handle anything. */
+ if (TARGET_64BIT)
+ return true;
+
+ /* For 32-bit, everything's fine if we have one free register. */
+ if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
+ return true;
+
+ /* Need a free register for vcall_offset. */
+ if (vcall_offset)
+ return false;
+
+ /* Need a free register for GOT references. */
+ if (flag_pic && !(*targetm.binds_local_p) (function))
+ return false;
+
+ /* Otherwise ok. */
+ return true;
+}
+
+/* Output the assembler code for a thunk function. THUNK_DECL is the
+ declaration for the thunk function itself, FUNCTION is the decl for
+ the target function. DELTA is an immediate constant offset to be
+ added to THIS. If VCALL_OFFSET is nonzero, the word at
+ *(*this + vcall_offset) should be added to THIS. */
+
+static void
+x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+ tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+ HOST_WIDE_INT vcall_offset, tree function)
+{
+ rtx xops[3];
+ rtx this = x86_this_parameter (function);
+ rtx this_reg, tmp;
+
+ /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
+ pull it in now and let DELTA benefit. */
+ if (REG_P (this))
+ this_reg = this;
+ else if (vcall_offset)
+ {
+ /* Put the this parameter into %eax. */
+ xops[0] = this;
+ xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ this_reg = NULL_RTX;
+
+ /* Adjust the this parameter by a fixed constant. */
+ if (delta)
+ {
+ xops[0] = GEN_INT (delta);
+ xops[1] = this_reg ? this_reg : this;
+ if (TARGET_64BIT)
+ {
+ if (!x86_64_general_operand (xops[0], DImode))
+ {
+ tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
+ xops[1] = tmp;
+ output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
+ xops[0] = tmp;
+ xops[1] = this;
+ }
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* Adjust the this parameter by a value stored in the vtable. */
+ if (vcall_offset)
+ {
+ if (TARGET_64BIT)
+ tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
+ else
+ {
+ int tmp_regno = 2 /* ECX */;
+ if (lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ tmp_regno = 0 /* EAX */;
+ tmp = gen_rtx_REG (SImode, tmp_regno);
+ }
+
+ xops[0] = gen_rtx_MEM (Pmode, this_reg);
+ xops[1] = tmp;
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+
+ /* Adjust the this parameter. */
+ xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
+ if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+ {
+ rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ xops[0] = GEN_INT (vcall_offset);
+ xops[1] = tmp2;
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+ }
+ xops[1] = this_reg;
+ if (TARGET_64BIT)
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* If necessary, drop THIS back to its stack slot. */
+ if (this_reg && this_reg != this)
+ {
+ xops[0] = this_reg;
+ xops[1] = this;
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ xops[0] = XEXP (DECL_RTL (function), 0);
+ if (TARGET_64BIT)
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ else
+ {
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+ tmp = gen_rtx_CONST (Pmode, tmp);
+ tmp = gen_rtx_MEM (QImode, tmp);
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%A0", xops);
+ }
+ }
+ else
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ else
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ rtx sym_ref = XEXP (DECL_RTL (function), 0);
+ /* APPLE LOCAL begin axe stubs 5571540 */
+ if (darwin_stubs)
+ sym_ref = (gen_rtx_SYMBOL_REF
+ (Pmode,
+ machopic_indirection_name (sym_ref, /*stub_p=*/true)));
+ tmp = gen_rtx_MEM (QImode, sym_ref);
+ /* APPLE LOCAL end axe stubs 5571540 */
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%0", xops);
+ }
+ else
+#endif /* TARGET_MACHO */
+ {
+ tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX);
+
+ xops[1] = tmp;
+ output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
+ output_asm_insn ("jmp\t{*}%1", xops);
+ }
+ }
+}
+
+static void
+x86_file_start (void)
+{
+ default_file_start ();
+#if TARGET_MACHO
+ darwin_file_start ();
+#endif
+ if (X86_FILE_START_VERSION_DIRECTIVE)
+ fputs ("\t.version\t\"01.01\"\n", asm_out_file);
+ if (X86_FILE_START_FLTUSED)
+ fputs ("\t.global\t__fltused\n", asm_out_file);
+ if (ix86_asm_dialect == ASM_INTEL)
+ fputs ("\t.intel_syntax\n", asm_out_file);
+}
+
+int
+x86_field_alignment (tree field, int computed)
+{
+ enum machine_mode mode;
+ tree type = TREE_TYPE (field);
+
+ if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
+ return computed;
+ /* APPLE LOCAL begin mac68k alignment */
+#if TARGET_MACHO
+ if (OPTION_ALIGN_MAC68K)
+ {
+ if (computed >= 128)
+ return computed;
+ return MIN (computed, 16);
+ }
+#endif
+ /* APPLE LOCAL end mac68k alignment */
+ mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
+ ? get_inner_array_type (type) : type);
+ if (mode == DFmode || mode == DCmode
+ || GET_MODE_CLASS (mode) == MODE_INT
+ || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+ return MIN (32, computed);
+ return computed;
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry. */
+void
+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+ if (TARGET_64BIT)
+ if (flag_pic)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
+#endif
+ fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
+ }
+ else
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
+#endif
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+ else if (flag_pic)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
+ LPREFIX, labelno, PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
+ }
+ else
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
+ PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+}
+
+/* We don't have exact information about the insn sizes, but we may assume
+ quite safely that we are informed about all 1 byte insns and memory
+ address sizes. This is enough to eliminate unnecessary padding in
+ 99% of cases. */
+
+static int
+min_insn_size (rtx insn)
+{
+ int l = 0;
+
+ if (!INSN_P (insn) || !active_insn_p (insn))
+ return 0;
+
+ /* Discard alignments we've emit and jump instructions. */
+ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+ return 0;
+ if (GET_CODE (insn) == JUMP_INSN
+ && (GET_CODE (PATTERN (insn)) == ADDR_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ return 0;
+
+ /* Important case - calls are always 5 bytes.
+ It is common to have many calls in the row. */
+ if (GET_CODE (insn) == CALL_INSN
+ && symbolic_reference_mentioned_p (PATTERN (insn))
+ && !SIBLING_CALL_P (insn))
+ return 5;
+ if (get_attr_length (insn) <= 1)
+ return 1;
+
+ /* For normal instructions we may rely on the sizes of addresses
+ and the presence of symbol to require 4 bytes of encoding.
+ This is not the case for jumps where references are PC relative. */
+ if (GET_CODE (insn) != JUMP_INSN)
+ {
+ l = get_attr_length_address (insn);
+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+ l = 4;
+ }
+ if (l)
+ return 1+l;
+ else
+ return 2;
+}
+
+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
+ window. */
+
+static void
+ix86_avoid_jump_misspredicts (void)
+{
+ rtx insn, start = get_insns ();
+ int nbytes = 0, njumps = 0;
+ int isjump = 0;
+
+ /* Look for all minimal intervals of instructions containing 4 jumps.
+ The intervals are bounded by START and INSN. NBYTES is the total
+ size of instructions in the interval including INSN and not including
+ START. When the NBYTES is smaller than 16 bytes, it is possible
+ that the end of START and INSN ends up in the same 16byte page.
+
+ The smallest offset in the page INSN can start is the case where START
+ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
+ We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
+ */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+
+ nbytes += min_insn_size (insn);
+ if (dump_file)
+ fprintf(dump_file, "Insn %i estimated to %i bytes\n",
+ INSN_UID (insn), min_insn_size (insn));
+ if ((GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ || GET_CODE (insn) == CALL_INSN)
+ njumps++;
+ else
+ continue;
+
+ while (njumps > 3)
+ {
+ start = NEXT_INSN (start);
+ if ((GET_CODE (start) == JUMP_INSN
+ && GET_CODE (PATTERN (start)) != ADDR_VEC
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+ || GET_CODE (start) == CALL_INSN)
+ njumps--, isjump = 1;
+ else
+ isjump = 0;
+ nbytes -= min_insn_size (start);
+ }
+ gcc_assert (njumps >= 0);
+ if (dump_file)
+ fprintf (dump_file, "Interval %i to %i has %i bytes\n",
+ INSN_UID (start), INSN_UID (insn), nbytes);
+
+ if (njumps == 3 && isjump && nbytes < 16)
+ {
+ int padsize = 15 - nbytes + min_insn_size (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Padding insn %i by %i bytes!\n",
+ INSN_UID (insn), padsize);
+ emit_insn_before (gen_align (GEN_INT (padsize)), insn);
+ }
+ }
+}
+
+/* AMD Athlon works faster
+ when RET is not destination of conditional jump or directly preceded
+ by other jump instruction. We avoid the penalty by inserting NOP just
+ before the RET instructions in such cases. */
+static void
+ix86_pad_returns (void)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ {
+ basic_block bb = e->src;
+ rtx ret = BB_END (bb);
+ rtx prev;
+ bool replace = false;
+
+ if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
+ || !maybe_hot_bb_p (bb))
+ continue;
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+ if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
+ break;
+ if (prev && GET_CODE (prev) == CODE_LABEL)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (EDGE_FREQUENCY (e) && e->src->index >= 0
+ && !(e->flags & EDGE_FALLTHRU))
+ replace = true;
+ }
+ if (!replace)
+ {
+ prev = prev_active_insn (ret);
+ if (prev
+ && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
+ || GET_CODE (prev) == CALL_INSN))
+ replace = true;
+ /* Empty functions get branch mispredict even when the jump destination
+ is not visible to us. */
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ replace = true;
+ }
+ if (replace)
+ {
+ emit_insn_before (gen_return_internal_long (), ret);
+ delete_insn (ret);
+ }
+ }
+}
+
+/* Implement machine specific optimizations. We implement padding of returns
+ for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
+static void
+ix86_reorg (void)
+{
+ if (TARGET_PAD_RETURNS && optimize && !optimize_size)
+ ix86_pad_returns ();
+ if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
+ ix86_avoid_jump_misspredicts ();
+}
+
+/* Return nonzero when QImode register that must be represented via REX prefix
+ is used. */
+bool
+x86_extended_QIreg_mentioned_p (rtx insn)
+{
+ int i;
+ extract_insn_cached (insn);
+ for (i = 0; i < recog_data.n_operands; i++)
+ if (REG_P (recog_data.operand[i])
+ && REGNO (recog_data.operand[i]) >= 4)
+ return true;
+ return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+ Called via for_each_rtx. */
+static int
+extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
+{
+ unsigned int regno;
+ if (!REG_P (*p))
+ return 0;
+ regno = REGNO (*p);
+ return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+ prefix. */
+bool
+x86_extended_reg_mentioned_p (rtx insn)
+{
+ return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
+}
+
+/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
+ optabs would emit if we didn't have TFmode patterns. */
+
+void
+x86_emit_floatuns (rtx operands[2])
+{
+ rtx neglab, donelab, i0, i1, f0, in, out;
+ enum machine_mode mode, inmode;
+
+ inmode = GET_MODE (operands[1]);
+ /* APPLE LOCAL begin 4176531 4424891 */
+ mode = GET_MODE (operands[0]);
+ if (!TARGET_64BIT && mode == DFmode && !optimize_size)
+ {
+ switch (inmode)
+ {
+ case SImode:
+ ix86_expand_convert_uns_SI2DF_sse (operands);
+ break;
+ case DImode:
+ ix86_expand_convert_uns_DI2DF_sse (operands);
+ break;
+ default:
+ abort ();
+ break;
+ }
+ return;
+ }
+ /* APPLE LOCAL end 4176531 4424891 */
+
+ out = operands[0];
+ in = force_reg (inmode, operands[1]);
+ /* APPLE LOCAL begin one line deletion 4424891 */
+ /* APPLE LOCAL end one line deletion 4424891 */
+ neglab = gen_label_rtx ();
+ donelab = gen_label_rtx ();
+ i1 = gen_reg_rtx (Pmode);
+ f0 = gen_reg_rtx (mode);
+
+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
+
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+ emit_jump_insn (gen_jump (donelab));
+ emit_barrier ();
+
+ emit_label (neglab);
+
+ i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+ expand_float (f0, i0, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+ emit_label (donelab);
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ with all elements equal to VAR. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx val)
+{
+ enum machine_mode smode, wsmode, wvmode;
+ rtx x;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ case V4SFmode:
+ case V4SImode:
+ val = force_reg (GET_MODE_INNER (mode), val);
+ x = gen_rtx_VEC_DUPLICATE (mode, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4HImode:
+ if (!mmx_ok)
+ return false;
+ if (TARGET_SSE || TARGET_3DNOW_A)
+ {
+ val = gen_lowpart (SImode, val);
+ x = gen_rtx_TRUNCATE (HImode, val);
+ x = gen_rtx_VEC_DUPLICATE (mode, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+ }
+ else
+ {
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V2SImode;
+ goto widen;
+ }
+
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V4HImode;
+ goto widen;
+ case V8HImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend HImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V8HImode vector. */
+ tmp1 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
+ /* Duplicate the low short through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+ /* Cast the V8HImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V8HImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
+ return true;
+ }
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V4SImode;
+ goto widen;
+ case V16QImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend QImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V16QImode vector. */
+ tmp1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
+ /* Duplicate the low byte through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ /* Cast the V16QImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V16QImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
+ return true;
+ }
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V8HImode;
+ goto widen;
+ widen:
+ /* Replicate the value once into the next wider mode and recurse. */
+ val = convert_modes (wsmode, smode, val, true);
+ x = expand_simple_binop (wsmode, ASHIFT, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wvmode);
+ if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
+ gcc_unreachable ();
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ whose ONE_VAR element is VAR, and other elements are zero. Return true
+ if successful. */
+
+static bool
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx var, int one_var)
+{
+ enum machine_mode vsimode;
+ rtx new_target;
+ rtx x, tmp;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ if (one_var != 0)
+ return false;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4SFmode:
+ case V4SImode:
+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ new_target = gen_reg_rtx (mode);
+ else
+ new_target = target;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_DUPLICATE (mode, var);
+ x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+ if (one_var != 0)
+ {
+ /* We need to shuffle the value to the correct position, so
+ create a new pseudo to store the intermediate result. */
+
+ /* With SSE2, we can use the integer shuffle insns. */
+ if (mode != V4SFmode && TARGET_SSE2)
+ {
+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0 : 1),
+ GEN_INT (one_var == 3 ? 0 : 1)));
+ if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+ }
+
+ /* Otherwise convert the intermediate result to V4SFmode and
+ use the SSE1 shuffle instructions. */
+ if (mode != V4SFmode)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+ }
+ else
+ tmp = new_target;
+
+ emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0+4 : 1+4),
+ GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+ if (mode != V4SFmode)
+ emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+ else if (tmp != target)
+ emit_move_insn (target, tmp);
+ }
+ else if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+
+ case V8HImode:
+ case V16QImode:
+ vsimode = V4SImode;
+ goto widen;
+ case V4HImode:
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ vsimode = V2SImode;
+ goto widen;
+ widen:
+ if (one_var != 0)
+ return false;
+
+ /* Zero extend the variable element to SImode and recurse. */
+ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
+
+ x = gen_reg_rtx (vsimode);
+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+ var, one_var))
+ gcc_unreachable ();
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ consisting of the values in VALS. It is known that all elements
+ except ONE_VAR are constants. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals, int one_var)
+{
+ rtx var = XVECEXP (vals, 0, one_var);
+ enum machine_mode wmode;
+ rtx const_vec, x;
+
+ const_vec = copy_rtx (vals);
+ XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+
+ switch (mode)
+ {
+ case V2DFmode:
+ case V2DImode:
+ case V2SFmode:
+ case V2SImode:
+ /* For the two element vectors, it's just as easy to use
+ the general case. */
+ return false;
+
+ case V4SFmode:
+ case V4SImode:
+ case V8HImode:
+ case V4HImode:
+ break;
+
+ case V16QImode:
+ wmode = V8HImode;
+ goto widen;
+ case V8QImode:
+ wmode = V4HImode;
+ goto widen;
+ widen:
+ /* There's no way to set one QImode entry easily. Combine
+ the variable value with its adjacent constant value, and
+ promote to an HImode set. */
+ x = XVECEXP (vals, 0, one_var ^ 1);
+ if (one_var & 1)
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ x = GEN_INT (INTVAL (x) & 0xff);
+ }
+ else
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ x = gen_int_mode (INTVAL (x) << 8, HImode);
+ }
+ if (x != const0_rtx)
+ var = expand_simple_binop (HImode, IOR, var, x, var,
+ 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wmode);
+ emit_move_insn (x, gen_lowpart (wmode, const_vec));
+ ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+
+ emit_move_insn (target, const_vec);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+}
+
+/* A subroutine of ix86_expand_vector_init. Handle the most general case:
+ all values variable, and none identical. */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals)
+{
+ enum machine_mode half_mode = GET_MODE_INNER (mode);
+ rtx op0 = NULL, op1 = NULL;
+ bool use_vec_concat = false;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok && !TARGET_SSE)
+ break;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ /* For the two element vectors, we always implement VEC_CONCAT. */
+ op0 = XVECEXP (vals, 0, 0);
+ op1 = XVECEXP (vals, 0, 1);
+ use_vec_concat = true;
+ break;
+
+ case V4SFmode:
+ half_mode = V2SFmode;
+ goto half;
+ case V4SImode:
+ half_mode = V2SImode;
+ goto half;
+ half:
+ {
+ rtvec v;
+
+ /* For V4SF and V4SI, we implement a concat of two V2 vectors.
+ Recurse to load the two halves. */
+
+ op0 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
+ ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+
+ op1 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
+ ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+
+ use_vec_concat = true;
+ }
+ break;
+
+ case V8HImode:
+ case V16QImode:
+ case V4HImode:
+ case V8QImode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (use_vec_concat)
+ {
+ if (!register_operand (op0, half_mode))
+ op0 = force_reg (half_mode, op0);
+ if (!register_operand (op1, half_mode))
+ op1 = force_reg (half_mode, op1);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op0, op1)));
+ }
+ else
+ {
+ int i, j, n_elts, n_words, n_elt_per_word;
+ enum machine_mode inner_mode;
+ rtx words[4], shift;
+
+ inner_mode = GET_MODE_INNER (mode);
+ n_elts = GET_MODE_NUNITS (mode);
+ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+ n_elt_per_word = n_elts / n_words;
+ shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
+
+ for (i = 0; i < n_words; ++i)
+ {
+ rtx word = NULL_RTX;
+
+ for (j = 0; j < n_elt_per_word; ++j)
+ {
+ rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+ elt = convert_modes (word_mode, inner_mode, elt, true);
+
+ if (j == 0)
+ word = elt;
+ else
+ {
+ word = expand_simple_binop (word_mode, ASHIFT, word, shift,
+ word, 1, OPTAB_LIB_WIDEN);
+ word = expand_simple_binop (word_mode, IOR, word, elt,
+ word, 1, OPTAB_LIB_WIDEN);
+ }
+ }
+
+ words[i] = word;
+ }
+
+ if (n_words == 1)
+ emit_move_insn (target, gen_lowpart (mode, words[0]));
+ else if (n_words == 2)
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
+ emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
+ emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
+ emit_move_insn (target, tmp);
+ }
+ else if (n_words == 4)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+ ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
+ emit_move_insn (target, gen_lowpart (mode, tmp));
+ }
+ else
+ gcc_unreachable ();
+ }
+}
+
+/* Initialize vector TARGET via VALS. Suppress the use of MMX
+ instructions unless MMX_OK is true. */
+
+void
+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true, all_const_zero = true;
+ int i;
+ rtx x;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ n_var++, one_var = i;
+ else if (x != CONST0_RTX (inner_mode))
+ all_const_zero = false;
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ /* Constants are best loaded from the constant pool. */
+ if (n_var == 0)
+ {
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+ return;
+ }
+
+ /* If all values are identical, broadcast the value. */
+ if (all_same
+ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+ XVECEXP (vals, 0, 0)))
+ return;
+
+ /* Values where only one field is non-constant are best loaded from
+ the pool and overwritten via move later. */
+ if (n_var == 1)
+ {
+ if (all_const_zero
+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+ XVECEXP (vals, 0, one_var),
+ one_var))
+ return;
+
+ if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
+ return;
+ }
+
+ ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
+}
+
+void
+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ bool use_vec_merge = false;
+ rtx tmp;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (mmx_ok)
+ {
+ tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+ ix86_expand_vector_extract (true, tmp, target, 1 - elt);
+ if (elt == 0)
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ return;
+ }
+ break;
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case V2DImode:
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+
+ case V2DFmode:
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ {
+ rtx op0, op1;
+
+ /* For the two element vectors, we implement a VEC_CONCAT with
+ the extraction of the other element. */
+
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
+
+ if (elt == 0)
+ op0 = val, op1 = tmp;
+ else
+ op0 = tmp, op1 = val;
+
+ tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ return;
+
+ case V4SFmode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ switch (elt)
+ {
+ case 0:
+ use_vec_merge = true;
+ break;
+
+ case 1:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* target = A A B B */
+ emit_insn (gen_sse_unpcklps (target, target, target));
+ /* target = X A B B */
+ ix86_expand_vector_set (false, target, val, 0);
+ /* target = A X C D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (1), GEN_INT (0),
+ GEN_INT (2+4), GEN_INT (3+4)));
+ return;
+
+ case 2:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (0+4), GEN_INT (3+4)));
+ return;
+
+ case 3:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (2+4), GEN_INT (0+4)));
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ case V4SImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Element 0 handled by vec_merge below. */
+ if (elt == 0)
+ {
+ use_vec_merge = true;
+ break;
+ }
+
+ if (TARGET_SSE2)
+ {
+ /* With SSE2, use integer shuffles to swap element 0 and ELT,
+ store into element 0, then shuffle them back. */
+
+ rtx order[4];
+
+ order[0] = GEN_INT (elt);
+ order[1] = const1_rtx;
+ order[2] = const2_rtx;
+ order[3] = GEN_INT (3);
+ order[elt] = const0_rtx;
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+
+ ix86_expand_vector_set (false, target, val, 0);
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
+ gen_lowpart (SFmode, val), elt);
+ }
+ return;
+
+ case V8HImode:
+ use_vec_merge = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case V8QImode:
+ default:
+ break;
+ }
+
+ if (use_vec_merge)
+ {
+ tmp = gen_rtx_VEC_DUPLICATE (mode, val);
+ tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, target);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (tmp, val);
+
+ emit_move_insn (target, mem);
+ }
+}
+
+void
+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
+{
+ enum machine_mode mode = GET_MODE (vec);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ bool use_vec_extr = false;
+ rtx tmp;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ break;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ use_vec_extr = true;
+ break;
+
+ case V4SFmode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt+4), GEN_INT (elt+4)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_unpckhps (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ break;
+
+ case V4SImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ if (TARGET_SSE2)
+ {
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_pshufd_1 (tmp, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt), GEN_INT (elt)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
+ gen_lowpart (V4SFmode, vec), elt);
+ return;
+ }
+ break;
+
+ case V8HImode:
+ use_vec_extr = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case V8QImode:
+ /* ??? Could extract the appropriate HImode element and shift. */
+ default:
+ break;
+ }
+
+ if (use_vec_extr)
+ {
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
+
+ /* Let the rtl optimizers know about the zero extension performed. */
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ if (inner_mode == QImode || inner_mode == HImode)
+ {
+ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
+ target = gen_lowpart (SImode, target);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, vec);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (target, tmp);
+ }
+}
+
+/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
+ pattern to reduce; DEST is the destination; IN is the input vector. */
+
+void
+ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+ rtx tmp1, tmp2, tmp3;
+
+ tmp1 = gen_reg_rtx (V4SFmode);
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse_movhlps (tmp1, in, in));
+ emit_insn (fn (tmp2, tmp1, in));
+
+ emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
+ GEN_INT (1), GEN_INT (1),
+ GEN_INT (1+4), GEN_INT (1+4)));
+ emit_insn (fn (dest, tmp2, tmp3));
+}
+
+/* Target hook for scalar_mode_supported_p. */
+static bool
+ix86_scalar_mode_supported_p (enum machine_mode mode)
+{
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return true;
+ else
+ return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p. */
+static bool
+ix86_vector_mode_supported_p (enum machine_mode mode)
+{
+ if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+ return true;
+ if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+ return true;
+ if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ return true;
+ if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+ return true;
+ return false;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+
+ We do this in the new i386 backend to maintain source compatibility
+ with the old cc0-based compiler. */
+
+static tree
+ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+ tree inputs ATTRIBUTE_UNUSED,
+ tree clobbers)
+{
+ clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
+ clobbers);
+ clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
+ clobbers);
+ clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
+ clobbers);
+ return clobbers;
+}
+
+/* Return true if this goes in small data/bss. */
+
+static bool
+ix86_in_large_data_p (tree exp)
+{
+ if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
+ return false;
+
+ /* Functions are never large data. */
+ if (TREE_CODE (exp) == FUNCTION_DECL)
+ return false;
+
+ if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+ {
+ const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+ if (strcmp (section, ".ldata") == 0
+ || strcmp (section, ".lbss") == 0)
+ return true;
+ return false;
+ }
+ else
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+ /* If this is an incomplete type with size 0, then we can't put it
+ in data because it might be too big when completed. */
+ if (!size || size > ix86_section_threshold)
+ return true;
+ }
+
+ return false;
+}
+static void
+ix86_encode_section_info (tree decl, rtx rtl, int first)
+{
+ default_encode_section_info (decl, rtl, first);
+
+ if (TREE_CODE (decl) == VAR_DECL
+ && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+ && ix86_in_large_data_p (decl))
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
+}
+
+/* Worker function for REVERSE_CONDITION. */
+
+enum rtx_code
+ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
+{
+ return (mode != CCFPmode && mode != CCFPUmode
+ ? reverse_condition (code)
+ : reverse_condition_maybe_unordered (code));
+}
+
+/* Output code to perform an x87 FP register move, from OPERANDS[1]
+ to OPERANDS[0]. */
+
+const char *
+output_387_reg_move (rtx insn, rtx *operands)
+{
+ if (REG_P (operands[1])
+ && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG)
+ return output_387_ffreep (operands, 0);
+ return "fstp\t%y0";
+ }
+ if (STACK_TOP_P (operands[0]))
+ return "fld%z1\t%y1";
+ return "fst\t%y0";
+}
+
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+ FP status register is set. */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+ rtx reg = gen_reg_rtx (HImode);
+ rtx temp;
+
+ emit_insn (gen_x86_fnstsw_1 (reg));
+
+ if (TARGET_USE_SAHF)
+ {
+ emit_insn (gen_x86_sahf_1 (reg));
+
+ temp = gen_rtx_REG (CCmode, FLAGS_REG);
+ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
+
+ temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
+ }
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+ emit_jump_insn (temp);
+}
+
+/* Output code to perform a log1p XFmode calculation. */
+
+void ix86_emit_i387_log1p (rtx op0, rtx op1)
+{
+ rtx label1 = gen_label_rtx ();
+ rtx label2 = gen_label_rtx ();
+
+ rtx tmp = gen_reg_rtx (XFmode);
+ rtx tmp2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_absxf2 (tmp, op1));
+ emit_insn (gen_cmpxf (tmp,
+ CONST_DOUBLE_FROM_REAL_VALUE (
+ REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
+ XFmode)));
+ emit_jump_insn (gen_bge (label1));
+
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
+ emit_jump (label2);
+
+ emit_label (label1);
+ emit_move_insn (tmp, CONST1_RTX (XFmode));
+ emit_insn (gen_addxf3 (tmp, op1, tmp));
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
+
+ emit_label (label2);
+}
+
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
+
+static void
+i386_solaris_elf_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ /* With Binutils 2.15, the "@unwind" marker must be specified on
+ every occurrence of the ".eh_frame" section, not just the first
+ one. */
+ if (TARGET_64BIT
+ && strcmp (name, ".eh_frame") == 0)
+ {
+ fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
+ flags & SECTION_WRITE ? "aw" : "a");
+ return;
+ }
+ default_elf_asm_named_section (name, flags, decl);
+}
+
+/* APPLE LOCAL begin regparmandstackparm */
+
+/* Mark this fndecl as using the regparmandstackparm calling convention. */
+static void
+ix86_make_regparmandstackparmee (tree *pt)
+{
+ decl_attributes (pt,
+ tree_cons (get_identifier ("regparmandstackparmee"),
+ NULL_TREE, TYPE_ATTRIBUTES (*pt)), 0);
+}
+
+/* Lookup fndecls marked 'regparmandstackparm', retrieve their $3SSE equivalents. */
+static splay_tree ix86_darwin_regparmandstackparm_st;
+/* Cache for regparmandstackparm fntypes. */
+static splay_tree ix86_darwin_fntype_st;
+
+/* Append "$3SSE" to an ID, returning a new IDENTIFIER_NODE. */
+static tree
+ix86_darwin_regparmandstackparm_mangle_name (tree id)
+{
+ static const char *mangle_suffix = "$3SSE";
+ unsigned int mangle_length = strlen (mangle_suffix);
+ const char *name;
+ unsigned int orig_length;
+ char *buf;
+
+ if (!id)
+ return NULL_TREE;
+
+ name = IDENTIFIER_POINTER (id);
+ orig_length = strlen (name);
+ buf = alloca (orig_length + mangle_length + 1);
+
+ strcpy (buf, name);
+ strcat (buf, mangle_suffix);
+ return get_identifier (buf); /* Expecting get_identifier to reallocate the string. */
+}
+
+/* Given the "normal" TRAD_FNDECL marked with 'regparmandstackparm',
+ return a duplicate fndecl marked 'regparmandstackparmee' (note trailing
+ 'ee'). Enter them as a pair in the splay tree ST, if non-null;
+ looking up the TRAD_FNDECL will return the new one. */
+static tree
+ix86_darwin_regparmandstackparm_dup_fndecl (tree trad_fndecl, splay_tree st)
+{
+ tree fntype;
+ tree new_fndecl;
+
+ fntype = TREE_TYPE (trad_fndecl);
+
+ /* NEW_FNDECL will be compiled with the XMM-based calling
+ convention, and TRAD_FNDECL (the original) will be compiled with
+ the traditional stack-based calling convention. */
+ new_fndecl = copy_node (trad_fndecl);
+ DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
+ allocate_struct_function (new_fndecl);
+ DECL_STRUCT_FUNCTION (new_fndecl)->function_end_locus
+ = DECL_STRUCT_FUNCTION (trad_fndecl)->function_end_locus;
+ DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl =
+ DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
+ DECL_RESULT (new_fndecl) = copy_node (DECL_RESULT (trad_fndecl));
+ DECL_CONTEXT (DECL_RESULT (new_fndecl)) = new_fndecl;
+ SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
+ DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
+ TYPE_ATTRIBUTES (TREE_TYPE (new_fndecl))
+ = copy_list (TYPE_ATTRIBUTES (TREE_TYPE (trad_fndecl)));
+ ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
+ /* Kludge: block copied from tree-inline.c(save_body). Should
+ be refactored into a common shareable routine. */
+ {
+ tree *parg;
+
+ for (parg = &DECL_ARGUMENTS (new_fndecl);
+ *parg;
+ parg = &TREE_CHAIN (*parg))
+ {
+ tree new = copy_node (*parg);
+
+ lang_hooks.dup_lang_specific_decl (new);
+ DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (*parg);
+ DECL_CONTEXT (new) = new_fndecl;
+ /* Note: it may be possible to move the original parameters
+ with the function body, making this splay tree
+ unnecessary. */
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) *parg, (splay_tree_value) new);
+ TREE_CHAIN (new) = TREE_CHAIN (*parg);
+ *parg = new;
+ }
+
+ if (DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl)
+ {
+ tree old = DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
+ tree new = copy_node (old);
+
+ lang_hooks.dup_lang_specific_decl (new);
+ DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (old);
+ DECL_CONTEXT (new) = new_fndecl;
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) old, (splay_tree_value) new);
+ TREE_CHAIN (new) = TREE_CHAIN (old);
+ DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = new;
+ }
+
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) DECL_RESULT (trad_fndecl),
+ (splay_tree_value) DECL_RESULT (new_fndecl));
+ }
+#if 0
+ /* Testing Kludge: If TREE_READONLY is set, cgen can and
+ occasionally will delete "pure" (no side-effect) calls to a
+ library function. Cleared here to preclude this when
+ test-building libraries. */
+ TREE_READONLY (new_fndecl) = false;
+#endif
+
+ return new_fndecl;
+}
+
+/* FNDECL has no body, but user has marked it as a regparmandstackparm
+ item. Create a corresponding regparmandstackparm decl for it, and
+ arrange for calls to be redirected to the regparmandstackparm
+ version. */
+static tree
+ix86_darwin_regparmandstackparm_extern_decl (tree trad_fndecl)
+{
+ tree new_fndecl;
+
+ /* new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, (splay_tree)0); */
+ new_fndecl = copy_node (trad_fndecl);
+ DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
+ DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
+ SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
+ ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
+ cgraph_finalize_function (new_fndecl, /* nested = */ true);
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
+ return new_fndecl;
+}
+
+/* Invoked after all functions have been seen and digested, but before
+ any inlining decisions have been made. Walk the callgraph, seeking
+ calls to functions that have regparmandstackparm variants. Rewrite the
+ calls, directing them to the new 'regparmandstackparmee' versions. */
+void
+ix86_darwin_redirect_calls(void)
+{
+ struct cgraph_node *fastcall_node, *node;
+ struct cgraph_edge *edge, *next_edge;
+ tree addr, fastcall_decl, orig_fntype;
+ splay_tree_node call_stn, type_stn;
+
+ if (!flag_unit_at_a_time)
+ return;
+
+ if (!ix86_darwin_fntype_st)
+ ix86_darwin_fntype_st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+
+ /* Extern decls marked "regparmandstackparm" beget regparmandstackparmee
+ decls. */
+ for (node = cgraph_nodes; node; node = node->next)
+ if (!DECL_SAVED_TREE (node->decl)
+ && lookup_attribute ("regparmandstackparm",
+ TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
+ && !lookup_attribute ("regparmandstackparmee",
+ TYPE_ATTRIBUTES (TREE_TYPE (node->decl))))
+ {
+ fastcall_decl = ix86_darwin_regparmandstackparm_extern_decl (node->decl);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) node->decl,
+ (splay_tree_value) fastcall_decl);
+ }
+
+ /* Walk the callgraph, rewriting calls as we go. */
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ call_stn = splay_tree_lookup (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key)node->decl);
+ /* If this function was in our splay-tree, we previously created
+ a regparmandstackparm version of it. */
+ if (call_stn)
+ {
+ fastcall_decl = (tree)call_stn->value;
+ fastcall_node = cgraph_node (fastcall_decl);
+ /* Redirect all calls to this fn to the regparmandstackparm
+ version. */
+ for (edge = next_edge = node->callers ; edge ; edge = next_edge)
+ {
+ tree call, stmt;
+ next_edge = next_edge->next_caller;
+ cgraph_redirect_edge_callee (edge, fastcall_node);
+ /* APPLE LOCAL */
+ /* MERGE FIXME call_expr -> call_stmt */
+ stmt = edge->call_stmt;
+ call = get_call_expr_in (stmt);
+ addr = TREE_OPERAND (call, 0);
+ TREE_OPERAND (addr, 0) = fastcall_decl;
+ orig_fntype = TREE_TYPE (addr);
+ /* Likewise, revise the TYPE of the ADDR node between
+ the CALL_EXPR and the FNDECL. This type determines
+ the parameters and calling convention applied to this
+ CALL_EXPR. */
+ type_stn = splay_tree_lookup (ix86_darwin_fntype_st, (splay_tree_value)orig_fntype);
+ if (type_stn)
+ TREE_TYPE (addr) = (tree)type_stn->value;
+ else
+ {
+ ix86_make_regparmandstackparmee (&TREE_TYPE (addr));
+ splay_tree_insert (ix86_darwin_fntype_st,
+ (splay_tree_key)orig_fntype,
+ (splay_tree_value)TREE_TYPE (addr));
+ }
+ }
+ }
+ }
+}
+
+/* Information necessary to re-context a function body. */
+typedef struct {
+ tree old_context;
+ tree new_context;
+ splay_tree decl_map;
+} recontext_data;
+
+/* Visit every node of a function body; if it points at the
+ OLD_CONTEXT, re-direct it to the NEW_CONTEXT. Invoked via
+ walk_tree. DECL_MAP is a splay tree that maps the original
+ parameters to new ones. */
+static tree
+ix86_darwin_re_context_1 (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED)
+{
+ tree t;
+ recontext_data *rcd;
+ enum tree_code_class class;
+ splay_tree_node n;
+
+ if (!tp)
+ return NULL_TREE;
+
+ t = *tp;
+ if (!t)
+ return NULL_TREE;
+
+ rcd = (recontext_data *)data;
+ n = splay_tree_lookup (rcd->decl_map, (splay_tree_key) t);
+ if (n)
+ {
+ *tp = (tree)n->value;
+ return NULL_TREE;
+ }
+
+ class = TREE_CODE_CLASS (TREE_CODE (t));
+ if (class != tcc_declaration)
+ return NULL_TREE;
+
+ if (DECL_CONTEXT (t) == rcd->old_context)
+ DECL_CONTEXT (t) = rcd->new_context;
+
+ return NULL_TREE;
+}
+
+/* Walk a function body, updating every pointer to OLD_CONTEXT to
+ NEW_CONTEXT. TP is the top of the function body, and ST is a splay
+ tree of replacements for the parameters. */
+static tree
+ix86_darwin_re_context (tree *tp, tree old_context, tree new_context, splay_tree st)
+{
+ recontext_data rcd;
+ tree ret;
+
+ rcd.old_context = old_context;
+ rcd.new_context = new_context;
+ rcd.decl_map = st;
+
+ ret = walk_tree (tp, ix86_darwin_re_context_1,
+ (void *)&rcd, (struct pointer_set_t *)0);
+ return ret;
+}
+
+/* Given TRAD_FNDECL, create a regparmandstackparm variant and hang the
+ DECL_SAVED_TREE body there. Create a new, one-statement body for
+ TRAD_FNDECL that calls the new one. If the return types are
+ compatible (e.g. non-FP), the call can usually be sibcalled. The
+ inliner will often copy the body from NEW_FNDECL into TRAD_FNDECL,
+ and we do nothing to prevent this. */
+static void
+ix86_darwin_regparmandstackparm_wrapper (tree trad_fndecl)
+{
+ tree new_fndecl;
+ splay_tree st;
+ tree bind, block, call, clone_parm, modify, parmlist, rdecl, rtn, stmt_list, type;
+ tree_stmt_iterator tsi;
+
+ st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, st);
+
+ for (parmlist = NULL, clone_parm = DECL_ARGUMENTS (trad_fndecl);
+ clone_parm;
+ clone_parm = TREE_CHAIN (clone_parm))
+ {
+ gcc_assert (clone_parm);
+ DECL_ABSTRACT_ORIGIN (clone_parm) = NULL;
+ parmlist = tree_cons (NULL, clone_parm, parmlist);
+ }
+
+ /* We built this list backwards; fix now. */
+ parmlist = nreverse (parmlist);
+ type = TREE_TYPE (TREE_TYPE (trad_fndecl));
+ call = build_function_call (new_fndecl, parmlist);
+ TREE_TYPE (call) = type;
+ if (type == void_type_node)
+ rtn = call;
+ else if (0 && ix86_return_in_memory (type))
+ {
+ /* Return without a RESULT_DECL: RETURN_EXPR (CALL). */
+ rtn = make_node (RETURN_EXPR);
+ TREE_OPERAND (rtn, 0) = call;
+ TREE_TYPE (rtn) = type;
+ }
+ else /* RETURN_EXPR(MODIFY(RESULT_DECL, CALL)). */
+ {
+ rdecl = make_node (RESULT_DECL);
+ TREE_TYPE (rdecl) = type;
+ DECL_MODE (rdecl) = TYPE_MODE (type);
+ DECL_RESULT (trad_fndecl) = rdecl;
+ DECL_CONTEXT (rdecl) = trad_fndecl;
+ modify = build_modify_expr (rdecl, NOP_EXPR, call);
+ TREE_TYPE (modify) = type;
+ rtn = make_node (RETURN_EXPR);
+ TREE_OPERAND (rtn, 0) = modify;
+ TREE_TYPE (rtn) = type;
+ }
+ stmt_list = alloc_stmt_list ();
+ tsi = tsi_start (stmt_list);
+ tsi_link_after (&tsi, rtn, TSI_NEW_STMT);
+
+ /* This wrapper consists of "return <my_name>$3SSE (<my_arguments>);"
+ thus it has no local variables. */
+ block = make_node (BLOCK);
+ TREE_USED (block) = true;
+ bind = make_node (BIND_EXPR);
+ BIND_EXPR_BLOCK (bind) = block;
+ BIND_EXPR_BODY (bind) = stmt_list;
+ TREE_TYPE (bind) = void_type_node;
+ TREE_SIDE_EFFECTS (bind) = true;
+
+ DECL_SAVED_TREE (trad_fndecl) = bind;
+
+ /* DECL_ABSTRACT_ORIGIN (new_fndecl) = NULL; *//* ? */
+
+ ix86_darwin_re_context (&new_fndecl, trad_fndecl, new_fndecl, st);
+ ix86_darwin_re_context (&DECL_SAVED_TREE (new_fndecl), trad_fndecl, new_fndecl, st);
+ splay_tree_delete (st);
+ gimplify_function_tree (new_fndecl);
+ cgraph_finalize_function (new_fndecl, /* nested = */ true);
+ gimplify_function_tree (trad_fndecl);
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
+}
+
+/* Entry point into the regparmandstackparm stuff. FNDECL might be marked
+ 'regparmandstackparm'; if it is, create the fast version, &etc. */
+void
+ix86_darwin_handle_regparmandstackparm (tree fndecl)
+{
+ static unsigned int already_running = 0;
+
+ /* We don't support variable-argument functions yet. */
+ if (!fndecl || already_running)
+ return;
+
+ already_running++;
+
+ if (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
+ && !lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ {
+ if (DECL_STRUCT_FUNCTION (fndecl) && DECL_STRUCT_FUNCTION (fndecl)->stdarg)
+ error ("regparmandstackparm is incompatible with varargs");
+ else if (DECL_SAVED_TREE (fndecl))
+ ix86_darwin_regparmandstackparm_wrapper (fndecl);
+ }
+
+ already_running--;
+}
+/* APPLE LOCAL end regparmandstackparm */
+
+/* APPLE LOCAL begin CW asm blocks */
+#include <ctype.h>
+#include "config/asm.h"
+
+/* Addition register names accepted for inline assembly that would
+ otherwise not be registers. This table must be sorted for
+ bsearch. */
+static const char *iasm_additional_names[] = {
+ "AH", "AL", "AX", "BH", "BL", "BP", "BX", "CH", "CL", "CX", "DH",
+ "DI", "DL", "DX", "EAX", "EBP", "EBX", "ECX", "EDI", "EDX", "ESI",
+ "ESP", "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "R10",
+ "R11", "R12", "R13", "R14", "R15", "R8", "R9", "RAX", "RBP", "RBX",
+ "RCX", "RDI", "RDX", "RSI", "RSP", "SI", "SP", "ST", "ST(1)", "ST(2)",
+ "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)", "XMM0", "XMM1", "XMM10",
+ "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XMM2", "XMM3", "XMM4",
+ "XMM5", "XMM6", "XMM7", "XMM8", "XMM9" };
+
+/* Comparison function for bsearch to find additional register names. */
+static int
+iasm_reg_comp (const void *a, const void *b)
+{
+ char *const*x = a;
+ char *const*y = b;
+ int c = strcasecmp (*x, *y);
+ return c;
+}
+
+/* Translate some register names seen in CW asm into GCC standard
+ forms. */
+
+const char *
+i386_iasm_register_name (const char *regname, char *buf)
+{
+ const char **r;
+
+ /* If we can find the named register, return it. */
+ if (decode_reg_name (regname) >= 0)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return regname;
+ sprintf (buf, "%%%s", regname);
+ return buf;
+ }
+
+ /* If we can find a lower case version of any registers in
+ additional_names, return it. */
+ r = bsearch (&regname, iasm_additional_names,
+ sizeof (iasm_additional_names) / sizeof (iasm_additional_names[0]),
+ sizeof (iasm_additional_names[0]), iasm_reg_comp);
+ if (r)
+ {
+ char *p;
+ const char *q;
+ q = regname = *r;
+ p = buf;
+ if (ASSEMBLER_DIALECT != ASM_INTEL)
+ *p++ = '%';
+ regname = p;
+ while ((*p++ = tolower (*q++)))
+ ;
+ if (decode_reg_name (regname) >= 0)
+ return buf;
+ }
+
+ return NULL;
+}
+
+/* Return true iff the opcode wants memory to be stable. We arrange
+ for a memory clobber in these instances. */
+bool
+iasm_memory_clobber (const char *ARG_UNUSED (opcode))
+{
+ return true;
+}
+
+/* Return true iff the operands need swapping. */
+
+bool
+iasm_x86_needs_swapping (const char *opcode)
+{
+ /* Don't swap if output format is the same as input format. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return false;
+
+ /* These don't need swapping. */
+ if (strcasecmp (opcode, "bound") == 0)
+ return false;
+ if (strcasecmp (opcode, "invlpga") == 0)
+ return false;
+ if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
+ return false;
+
+ return true;
+}
+
+/* Swap operands, given in MS-style asm ordering when the output style
+ is in ATT syntax. */
+
+static tree
+iasm_x86_swap_operands (const char *opcode, tree args)
+{
+ int noperands;
+
+ if (iasm_x86_needs_swapping (opcode) == false)
+ return args;
+
+#if 0
+ /* GAS also checks the type of the arguments to determine if they
+ need swapping. */
+ if ((argtype[0]&Imm) && (argtype[1]&Imm))
+ return args;
+#endif
+ noperands = list_length (args);
+ if (noperands == 2 || noperands == 3)
+ {
+ /* Swap first and last (1 and 2 or 1 and 3). */
+ return nreverse (args);
+ }
+ return args;
+}
+
+/* Map a register name to a high level tree type for a VAR_DECL of
+ that type, whose RTL will refer to the given register. */
+
+static tree
+iasm_type_for (tree arg)
+{
+ tree type = NULL_TREE;
+
+ if (IDENTIFIER_LENGTH (arg) > 2
+ && IDENTIFIER_POINTER (arg)[0] == '%')
+ {
+ enum machine_mode mode = VOIDmode;
+ if (IDENTIFIER_POINTER (arg)[1] == 'e')
+ mode = SImode;
+ else if (/* IDENTIFIER_POINTER (arg)[2] == 'h'
+ || */ IDENTIFIER_POINTER (arg)[2] == 'l')
+ mode = QImode;
+ else if (IDENTIFIER_POINTER (arg)[2] == 'x')
+ mode = HImode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'r')
+ mode = DImode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'x')
+ mode = SFmode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'm')
+ mode = SFmode;
+
+ if (mode != VOIDmode)
+ type = lang_hooks.types.type_for_mode (mode, 1);
+ }
+
+ return type;
+}
+
+/* We raise the code from a named register into a VAR_DECL of an
+ appropriate type that refers to the register so that reload doesn't
+ run out of registers. */
+
+tree
+iasm_raise_reg (tree arg)
+{
+ int regno = decode_reg_name (IDENTIFIER_POINTER (arg));
+ if (regno >= 0)
+ {
+ tree decl = NULL_TREE;
+
+ decl = lookup_name (arg);
+ if (decl == error_mark_node)
+ decl = 0;
+ if (decl == 0)
+ {
+ tree type = iasm_type_for (arg);
+ if (type)
+ {
+ decl = build_decl (VAR_DECL, arg, type);
+ DECL_ARTIFICIAL (decl) = 1;
+ DECL_REGISTER (decl) = 1;
+ C_DECL_REGISTER (decl) = 1;
+ DECL_HARD_REGISTER (decl) = 1;
+ set_user_assembler_name (decl, IDENTIFIER_POINTER (arg));
+ decl = lang_hooks.decls.pushdecl (decl);
+ }
+ }
+
+ if (decl)
+ return decl;
+ }
+
+ return arg;
+}
+
+/* Allow constants and readonly variables to be used in instructions
+ in places that require constants. */
+
+static tree
+iasm_default_conv (tree e)
+{
+ if (e == NULL_TREE)
+ return e;
+
+ if (TREE_CODE (e) == CONST_DECL)
+ e = DECL_INITIAL (e);
+
+ if (DECL_P (e) && DECL_MODE (e) != BLKmode)
+ e = decl_constant_value (e);
+ return e;
+}
+
+/* Return true iff the operand is suitible for as the offset for a
+ memory instruction. */
+
+static bool
+iasm_is_offset (tree v)
+{
+ if (TREE_CODE (v) == INTEGER_CST)
+ return true;
+ if (TREE_CODE (v) == ADDR_EXPR)
+ {
+ v = TREE_OPERAND (v, 0);
+ if (TREE_CODE (v) == VAR_DECL
+ && TREE_STATIC (v)
+ && MEM_P (DECL_RTL (v)))
+ {
+ note_alternative_entry_points ();
+ return true;
+ }
+ if (TREE_CODE (v) == LABEL_DECL)
+ return true;
+ return false;
+ }
+ if (TREE_CODE (v) == VAR_DECL
+ && TREE_STATIC (v)
+ && MEM_P (DECL_RTL (v)))
+ {
+ note_alternative_entry_points ();
+ return true;
+ }
+ if ((TREE_CODE (v) == MINUS_EXPR
+ || TREE_CODE (v) == PLUS_EXPR)
+ && iasm_is_offset (TREE_OPERAND (v, 0))
+ && iasm_is_offset (TREE_OPERAND (v, 1)))
+ return true;
+ if (TREE_CODE (v) == NEGATE_EXPR
+ && iasm_is_offset (TREE_OPERAND (v, 0)))
+ return true;
+
+ return false;
+}
+
+/* Combine two types for [] expressions. */
+
+static tree
+iasm_combine_type (tree type0, tree type1)
+{
+ if (type0 == void_type_node
+ || type0 == NULL_TREE)
+ {
+ if (type1 == void_type_node)
+ return NULL_TREE;
+ return type1;
+ }
+
+ if (type1 == void_type_node
+ || type1 == NULL_TREE)
+ return type0;
+
+ if (type0 == type1)
+ return type0;
+
+ error ("too many types in []");
+
+ return type0;
+}
+
+/* We canonicalize the inputs form of bracket expressions as the input
+ forms are less constrained than what the assembler will accept.
+
+ TOP is the top of the canonical tree we're generating and
+ TREE_OPERAND (, 0) is the offset portion of the expression. ARGP
+ points to the current part of the tree we're walking.
+
+ The tranformations we do:
+
+ (A+O) ==> A
+ (A-O) ==> A
+ (O+A) ==> A
+
+ where O are offset expressions. */
+
+static tree
+iasm_canonicalize_bracket_1 (tree* argp, tree top)
+{
+ tree arg = *argp;
+ tree offset = TREE_OPERAND (top, 0);
+ tree arg0, arg1;
+ tree rtype = NULL_TREE;
+
+ *argp = arg = iasm_default_conv (arg);
+
+ switch (TREE_CODE (arg))
+ {
+ case NOP_EXPR:
+ if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
+ {
+ *argp = TREE_OPERAND (arg, 0);
+ return TREE_TYPE (arg);
+ }
+ break;
+
+ case BRACKET_EXPR:
+ rtype = TREE_TYPE (arg);
+ /* fall thru */
+ case PLUS_EXPR:
+ arg0 = TREE_OPERAND (arg, 0);
+ arg1 = TREE_OPERAND (arg, 1);
+
+ arg0 = iasm_default_conv (arg0);
+ arg1 = iasm_default_conv (arg1);
+
+ if (iasm_is_offset (arg0))
+ {
+ if (offset != integer_zero_node)
+ arg0 = build2 (PLUS_EXPR, void_type_node, arg0, offset);
+ TREE_OPERAND (top, 0) = arg0;
+
+ *argp = arg1;
+ if (arg1)
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
+ }
+ else if (arg1 && iasm_is_offset (arg1))
+ {
+ if (offset != integer_zero_node)
+ arg1 = build2 (PLUS_EXPR, void_type_node, arg1, offset);
+ TREE_OPERAND (top, 0) = arg1;
+ *argp = arg0;
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
+ }
+ else
+ {
+ rtype = iasm_combine_type (rtype,
+ iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top));
+
+ if (arg1)
+ rtype = iasm_combine_type (rtype,
+ iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), top));
+ if (TREE_OPERAND (arg, 0) == NULL_TREE)
+ {
+ if (TREE_OPERAND (arg, 1))
+ {
+ TREE_OPERAND (arg, 0) = TREE_OPERAND (arg, 1);
+ TREE_OPERAND (arg, 1) = NULL_TREE;
+ }
+ else
+ *argp = NULL_TREE;
+ }
+ else if (TREE_OPERAND (arg, 1) == NULL_TREE && rtype == NULL_TREE)
+ *argp = TREE_OPERAND (arg, 0);
+ if (TREE_CODE (arg) == PLUS_EXPR
+ && TREE_TYPE (arg) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 1))
+ && (POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
+ || POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))))
+ {
+ tree type = TREE_TYPE (TREE_OPERAND (arg, 1));
+ if (INTEGRAL_TYPE_P (type))
+ type = TREE_TYPE (TREE_OPERAND (arg, 0));
+ TREE_TYPE (arg) = type;
+ }
+ if (TREE_CODE (arg) == PLUS_EXPR
+ && TREE_TYPE (arg) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 0)) == TREE_TYPE (TREE_OPERAND (arg, 1)))
+ {
+ tree type = TREE_TYPE (TREE_OPERAND (arg, 0));
+ TREE_TYPE (arg) = type;
+ }
+ }
+ return rtype;
+
+ case MINUS_EXPR:
+ rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top);
+ arg0 = TREE_OPERAND (arg, 0);
+ arg1 = TREE_OPERAND (arg, 1);
+ arg1 = iasm_default_conv (arg1);
+ if (iasm_is_offset (arg1))
+ {
+ offset = TREE_OPERAND (top, 0);
+ if (offset == integer_zero_node)
+ arg1 = fold (build1 (NEGATE_EXPR,
+ TREE_TYPE (arg1),
+ arg1));
+ else
+ arg1 = build2 (MINUS_EXPR, void_type_node, offset, arg1);
+ TREE_OPERAND (top, 0) = arg1;
+ *argp = arg0;
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));;
+ }
+ return rtype;
+
+ case PARM_DECL:
+ case VAR_DECL:
+ {
+ *argp = iasm_addr (arg);
+ break;
+ }
+
+ case IDENTIFIER_NODE:
+ {
+ *argp = iasm_raise_reg (arg);
+ break;
+ }
+
+ case MULT_EXPR:
+ if (TREE_TYPE (arg) == NULL_TREE)
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == IDENTIFIER_NODE)
+ TREE_OPERAND (arg, 1) = iasm_raise_reg (TREE_OPERAND (arg, 1));
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE)
+ TREE_OPERAND (arg, 0) = iasm_raise_reg (TREE_OPERAND (arg, 0));
+ if (TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 1)))
+ TREE_TYPE (arg) = TREE_TYPE (TREE_OPERAND (arg, 0));
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
+/* Form an indirection for an inline asm address expression operand.
+ We give a warning when we think the optimizer might have to be used
+ to reform complex addresses, &stack_var + %eax + 4 for example,
+ after gimplification rips the address apart. */
+
+static tree
+iasm_indirect (tree addr)
+{
+ if (TREE_CODE (addr) == ADDR_EXPR
+ && TREE_CODE (TREE_TYPE (TREE_OPERAND (addr, 0))) != ARRAY_TYPE
+ /* && TREE_CODE (TREE_OPERAND (addr, 0)) == ARRAY_REF */)
+ return TREE_OPERAND (addr, 0);
+
+ addr = fold (build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr));
+
+ if (! optimize && TREE_CODE (addr) == INDIRECT_REF)
+ warning (0, "addressing mode too complex when not optimizing, will consume extra register(s)");
+
+ return addr;
+}
+
+/* For an address addition for an inline asm address expression. We
+ try and form ARRAY_REFs, as they will go through gimplification
+ without being ripped apart. */
+
+static tree
+iasm_add (tree addr, tree off)
+{
+ if (integer_zerop (off))
+ return addr;
+
+ /* We have to convert the offset to an int type, as we rip apart
+ trees whose type has been converted to a pointer type for the
+ offset already. */
+ return pointer_int_sum (PLUS_EXPR, addr, convert (integer_type_node, off));
+}
+
+/* We canonicalize the inputs form of bracket expressions as the input
+ forms are less constrained than what the assembler will accept. */
+
+static tree
+iasm_canonicalize_bracket (tree arg)
+{
+ tree rtype;
+
+ gcc_assert (TREE_CODE (arg) == BRACKET_EXPR);
+
+ /* Let the normal operand printer output this without trying to
+ decompose it into parts so that things like (%esp + 20) + 4 can
+ be output as 24(%esp) by the optimizer instead of 4(%0) and
+ burning an "R" with (%esp + 20). */
+ if (TREE_OPERAND (arg, 1) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0))))
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL)
+ return arg;
+ return iasm_indirect (TREE_OPERAND (arg, 0));
+ }
+
+ /* Ensure that 0 is an offset */
+ if (TREE_OPERAND (arg, 0)
+ && iasm_is_offset (TREE_OPERAND (arg, 0)))
+ {
+ /* we win if 0 is an offset already. */
+ }
+ else if (TREE_OPERAND (arg, 1) == NULL_TREE)
+ {
+ /* Move 0 to 1, if 1 is empty and 0 isn't already an offset */
+ TREE_OPERAND (arg, 1) = TREE_OPERAND (arg, 0);
+ TREE_OPERAND (arg, 0) = integer_zero_node;
+ }
+ else
+ {
+ tree swp;
+ /* Just have to force it now */
+ swp = iasm_build_bracket (TREE_OPERAND (arg, 0), TREE_OPERAND (arg, 1));
+ TREE_OPERAND (arg, 0) = integer_zero_node;
+ TREE_OPERAND (arg, 1) = swp;
+ }
+
+ if (TREE_OPERAND (arg, 1))
+ {
+ rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), arg);
+ if (rtype)
+ TREE_TYPE (arg) = iasm_combine_type (TREE_TYPE (arg), rtype);
+ }
+
+ /* For correctness, pointer types should be raised to the tree
+ level, as they denote address calculations with stack based
+ objects, and we want print_operand to print the entire address so
+ that it can combine contants and hard registers into the address.
+ Unfortunnately we might have to rely upon the optimizer to reform
+ the address after the gimplification pass rips it apart. */
+
+ /* Handle [INTEGER_CST][ptr][op3] */
+ if (TREE_OPERAND (arg, 1)
+ && TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST
+ && TREE_CODE (TREE_OPERAND (arg, 1)) == BRACKET_EXPR
+ && TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)))
+ && TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) != void_type_node
+ && (TREE_TYPE (arg) == void_type_node
+ || (TREE_TYPE (arg) == get_identifier ("word")
+ && (TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))))
+ == HImode))))
+ {
+ tree op3 = TREE_OPERAND (TREE_OPERAND (arg, 1), 1);
+ tree addr = iasm_add (TREE_OPERAND (TREE_OPERAND (arg, 1), 0),
+ TREE_OPERAND (arg, 0));
+ tree type;
+ addr = iasm_indirect (addr);
+ if (op3 == NULL_TREE)
+ return addr;
+ type = TREE_TYPE (addr);
+ type = build_pointer_type (type);
+ addr = build1 (ADDR_EXPR, type, addr);
+ addr = fold (build2 (PLUS_EXPR, type, addr, op3));
+ return iasm_indirect (addr);
+ }
+
+ /* Handle ptr + INTEGER_CST */
+ if (TREE_OPERAND (arg, 1)
+ && TREE_TYPE (arg) == void_type_node
+ && TREE_TYPE (TREE_OPERAND (arg, 1))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
+ && TREE_TYPE (TREE_TYPE (TREE_OPERAND (arg, 1))) != void_type_node)
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == ADDR_EXPR)
+ {
+ if (TREE_OPERAND (arg, 0) == integer_zero_node)
+ return TREE_OPERAND (TREE_OPERAND (arg, 1), 0);
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
+ return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
+ }
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == PLUS_EXPR)
+ {
+ if (TREE_OPERAND (arg, 0) == integer_zero_node)
+ return iasm_indirect (TREE_OPERAND (arg, 1));
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
+ return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
+ }
+ }
+ return arg;
+}
+
+/* We canonicalize the instruction by swapping operands and rewritting
+ the opcode if the output style is in ATT syntax. */
+
+tree
+iasm_x86_canonicalize_operands (const char **opcode_p, tree iargs, void *ep)
+{
+ iasm_md_extra_info *e = ep;
+ static char buf[40];
+ tree args = iargs;
+ int argnum = 1;
+ const char *opcode = *opcode_p;
+ bool fp_style = false;
+ bool fpi_style = false;
+
+ /* Don't transform if output format is the same as input format. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return iargs;
+
+ if (strncasecmp (opcode, "f", 1) == 0)
+ fp_style = true;
+
+ if (fp_style
+ && strncasecmp (opcode+1, "i", 1) == 0)
+ fpi_style = true;
+
+ while (args)
+ {
+ tree arg = TREE_VALUE (args);
+
+ /* Handle st(3) */
+ if (TREE_CODE (arg) == COMPOUND_EXPR
+ && TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE
+ && strcasecmp (IDENTIFIER_POINTER (TREE_OPERAND (arg, 0)), "%st") == 0
+ && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST)
+ {
+ int v = tree_low_cst (TREE_OPERAND (arg, 1), 0);
+
+ if (v < 0 || v > 7)
+ {
+ error ("unknown floating point register st(%d)", v);
+ v = 0;
+ }
+
+ /* Rewrite %st(0) to %st. */
+ if (v == 0)
+ TREE_VALUE (args) = TREE_OPERAND (arg, 0);
+ else
+ {
+ char buf[20];
+ sprintf (buf, "%%st(%d)", v);
+ TREE_VALUE (args) = get_identifier (buf);
+ }
+ }
+ else if (TREE_CODE (arg) == BRACKET_EXPR)
+ TREE_VALUE (args) = arg = iasm_canonicalize_bracket (arg);
+
+ switch (TREE_CODE (arg))
+ {
+ case ARRAY_REF:
+ case VAR_DECL:
+ case PARM_DECL:
+ case INDIRECT_REF:
+ if (TYPE_MODE (TREE_TYPE (arg)) == QImode)
+ e->mod[argnum-1] = 'b';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == HImode)
+ e->mod[argnum-1] = fpi_style ? 's' : 'w';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == SImode)
+ e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
+ else if (TYPE_MODE (TREE_TYPE (arg)) == DImode)
+ e->mod[argnum-1] = 'q';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == SFmode)
+ e->mod[argnum-1] = 's';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == DFmode)
+ e->mod[argnum-1] = 'l';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == XFmode)
+ e->mod[argnum-1] = 't';
+ break;
+ case BRACKET_EXPR:
+ /* We use the TREE_TYPE to indicate the type of operand, it
+ it set with code like: inc dword ptr [eax]. */
+ if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
+ {
+ const char *s = IDENTIFIER_POINTER (TREE_TYPE (arg));
+ if (strcasecmp (s, "byte") == 0)
+ e->mod[argnum-1] = 'b';
+ else if (strcasecmp (s, "word") == 0)
+ e->mod[argnum-1] = fpi_style ? 's' : 'w';
+ else if (strcasecmp (s, "dword") == 0)
+ e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
+ else if (strcasecmp (s, "qword") == 0)
+ e->mod[argnum-1] = 'q';
+ else if (strcasecmp (s, "real4") == 0)
+ e->mod[argnum-1] = 's';
+ else if (strcasecmp (s, "real8") == 0)
+ e->mod[argnum-1] = 'l';
+ else if (strcasecmp (s, "real10") == 0)
+ e->mod[argnum-1] = 't';
+ else if (strcasecmp (s, "tbyte") == 0)
+ e->mod[argnum-1] = 't';
+ }
+ break;
+ case LABEL_DECL:
+ e->mod[argnum-1] = 'l';
+ break;
+ case IDENTIFIER_NODE:
+ if (IDENTIFIER_LENGTH (arg) > 2
+ && IDENTIFIER_POINTER (arg)[0] == '%')
+ {
+ if (IDENTIFIER_POINTER (arg)[1] == 'e')
+ e->mod[argnum-1] = 'l';
+ else if (IDENTIFIER_POINTER (arg)[2] == 'h'
+ || IDENTIFIER_POINTER (arg)[2] == 'l')
+ e->mod[argnum-1] = 'b';
+ else if (IDENTIFIER_POINTER (arg)[2] == 'x')
+ e->mod[argnum-1] = 'w';
+ }
+ break;
+ default:
+ break;
+ }
+ args = TREE_CHAIN (args);
+ ++argnum;
+ }
+ --argnum;
+
+ args = iasm_x86_swap_operands (opcode, iargs);
+ if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
+ e->pseudo = true;
+
+ if (strcasecmp (opcode, "movs") == 0
+ || strcasecmp (opcode, "scas") == 0
+ || strcasecmp (opcode, "stos") == 0
+ || strcasecmp (opcode, "xlat") == 0)
+ args = NULL_TREE;
+ else if (strcasecmp (opcode, "cmovpo") == 0)
+ opcode = "cmovnp";
+ else if (strcasecmp (opcode, "cmovpe") == 0)
+ opcode = "cmovp";
+ else if (strcasecmp (opcode, "outs") == 0
+ && TREE_CHAIN (args))
+ {
+ e->mod[0] = e->mod[1];
+ }
+ else if (strcasecmp (opcode, "ins") == 0
+ && TREE_CHAIN (args))
+ {
+ e->mod[1] = 0;
+ }
+ /* movsx isn't part of the AT&T syntax, they spell it movs. */
+ else if (strcasecmp (opcode, "movsx") == 0)
+ opcode = "movs";
+ else if (strcasecmp (opcode, "pushfd") == 0)
+ *opcode_p = "pushf";
+ else if (strcasecmp (opcode, "popfd") == 0)
+ *opcode_p = "popf";
+
+ /* movzx isn't part of the AT&T syntax, they spell it movz. */
+ if (strcasecmp (opcode, "movzx") == 0)
+ {
+ /* Silly extention of the day, A zero extended move that has the
+ same before and after size is accepted and it just a normal
+ move. */
+ if (argnum == 2
+ && (e->mod[0] == e->mod[1]
+ || e->mod[1] == 0))
+ opcode = "mov";
+ else
+ opcode = "movz";
+ }
+
+ if (strncasecmp (opcode, "f", 1) == 0 &&
+ (!(strcasecmp (opcode, "fldcw") == 0)))
+ {
+ if (e->mod[0] == 'w')
+ e->mod[0] = 's';
+ if (e->mod[1] == 'w')
+ e->mod[1] = 's';
+ }
+ else if (strcasecmp (opcode, "mov") == 0)
+ {
+ /* The 32-bit integer instructions can be used on floats. */
+ if (e->mod[0] == 's')
+ e->mod[0] = 'l';
+ if (e->mod[1] == 's')
+ e->mod[1] = 'l';
+ }
+
+ if (e->pseudo)
+ e->mod[0] = e->mod[1] = 0;
+ else if (strcasecmp (opcode, "clflush") == 0
+ || strcasecmp (opcode, "fbld") == 0
+ || strcasecmp (opcode, "fbstp") == 0
+ || strcasecmp (opcode, "fldt") == 0
+ || strcasecmp (opcode, "fnstcw") == 0
+ || strcasecmp (opcode, "fnstsw") == 0
+ || strcasecmp (opcode, "fstcw") == 0
+ || strcasecmp (opcode, "fstsw") == 0
+ || strcasecmp (opcode, "fxrstor") == 0
+ || strcasecmp (opcode, "fxsave") == 0
+ || strcasecmp (opcode, "invlpg") == 0
+ || strcasecmp (opcode, "jmp") == 0
+ || strcasecmp (opcode, "call") == 0
+ || strcasecmp (opcode, "ja") == 0
+ || strcasecmp (opcode, "jae") == 0
+ || strcasecmp (opcode, "jb") == 0
+ || strcasecmp (opcode, "jbe") == 0
+ || strcasecmp (opcode, "jc") == 0
+ || strcasecmp (opcode, "je") == 0
+ || strcasecmp (opcode, "jg") == 0
+ || strcasecmp (opcode, "jge") == 0
+ || strcasecmp (opcode, "jl") == 0
+ || strcasecmp (opcode, "jle") == 0
+ || strcasecmp (opcode, "jna") == 0
+ || strcasecmp (opcode, "jnae") == 0
+ || strcasecmp (opcode, "jnb") == 0
+ || strcasecmp (opcode, "jnc") == 0
+ || strcasecmp (opcode, "jne") == 0
+ || strcasecmp (opcode, "jng") == 0
+ || strcasecmp (opcode, "jnge") == 0
+ || strcasecmp (opcode, "jnl") == 0
+ || strcasecmp (opcode, "jnle") == 0
+ || strcasecmp (opcode, "jno") == 0
+ || strcasecmp (opcode, "jnp") == 0
+ || strcasecmp (opcode, "jns") == 0
+ || strcasecmp (opcode, "jnz") == 0
+ || strcasecmp (opcode, "jo") == 0
+ || strcasecmp (opcode, "jp") == 0
+ || strcasecmp (opcode, "jpe") == 0
+ || strcasecmp (opcode, "jpo") == 0
+ || strcasecmp (opcode, "js") == 0
+ || strcasecmp (opcode, "jz") == 0
+ || strcasecmp (opcode, "ldmxcsr") == 0
+ || strcasecmp (opcode, "lgdt") == 0
+ || strcasecmp (opcode, "lidt") == 0
+ || strcasecmp (opcode, "lldt") == 0
+ || strcasecmp (opcode, "lmsw") == 0
+ || strcasecmp (opcode, "ltr") == 0
+ || strcasecmp (opcode, "movapd") == 0
+ || strcasecmp (opcode, "movaps") == 0
+ || strcasecmp (opcode, "movd") == 0
+ || strcasecmp (opcode, "movhpd") == 0
+ || strcasecmp (opcode, "movhps") == 0
+ || strcasecmp (opcode, "movlpd") == 0
+ || strcasecmp (opcode, "movlps") == 0
+ || strcasecmp (opcode, "movntdq") == 0
+ || strcasecmp (opcode, "movntpd") == 0
+ || strcasecmp (opcode, "movntps") == 0
+ || strcasecmp (opcode, "movntq") == 0
+ || strcasecmp (opcode, "movq") == 0
+ || strcasecmp (opcode, "movsd") == 0
+ || strcasecmp (opcode, "movss") == 0
+ || strcasecmp (opcode, "movupd") == 0
+ || strcasecmp (opcode, "movups") == 0
+ || strcasecmp (opcode, "out") == 0
+ || strcasecmp (opcode, "prefetchnta") == 0
+ || strcasecmp (opcode, "prefetcht0") == 0
+ || strcasecmp (opcode, "prefetcht1") == 0
+ || strcasecmp (opcode, "prefetcht2") == 0
+ || strcasecmp (opcode, "seta") == 0
+ || strcasecmp (opcode, "setae") == 0
+ || strcasecmp (opcode, "setb") == 0
+ || strcasecmp (opcode, "setbe") == 0
+ || strcasecmp (opcode, "setc") == 0
+ || strcasecmp (opcode, "sete") == 0
+ || strcasecmp (opcode, "setg") == 0
+ || strcasecmp (opcode, "setge") == 0
+ || strcasecmp (opcode, "setl") == 0
+ || strcasecmp (opcode, "setle") == 0
+ || strcasecmp (opcode, "setna") == 0
+ || strcasecmp (opcode, "setnae") == 0
+ || strcasecmp (opcode, "setnb") == 0
+ || strcasecmp (opcode, "setnbe") == 0
+ || strcasecmp (opcode, "setnc") == 0
+ || strcasecmp (opcode, "setne") == 0
+ || strcasecmp (opcode, "setng") == 0
+ || strcasecmp (opcode, "setnge") == 0
+ || strcasecmp (opcode, "setnl") == 0
+ || strcasecmp (opcode, "setnle") == 0
+ || strcasecmp (opcode, "setno") == 0
+ || strcasecmp (opcode, "setnp") == 0
+ || strcasecmp (opcode, "setns") == 0
+ || strcasecmp (opcode, "setnz") == 0
+ || strcasecmp (opcode, "seto") == 0
+ || strcasecmp (opcode, "setp") == 0
+ || strcasecmp (opcode, "setpe") == 0
+ || strcasecmp (opcode, "setpo") == 0
+ || strcasecmp (opcode, "sets") == 0
+ || strcasecmp (opcode, "setz") == 0
+ || strcasecmp (opcode, "sldt") == 0
+ || strcasecmp (opcode, "smsw") == 0
+ || strcasecmp (opcode, "stmxcsr") == 0
+ || strcasecmp (opcode, "str") == 0
+ || strcasecmp (opcode, "xlat") == 0)
+ e->mod[0] = 0;
+ else if (strcasecmp (opcode, "lea") == 0
+ || strcasecmp (opcode, "rcl") == 0
+ || strcasecmp (opcode, "rcr") == 0
+ || strcasecmp (opcode, "rol") == 0
+ || strcasecmp (opcode, "ror") == 0
+ || strcasecmp (opcode, "sal") == 0
+ || strcasecmp (opcode, "sar") == 0
+ || strcasecmp (opcode, "shl") == 0
+ || strcasecmp (opcode, "shr") == 0)
+ e->mod[1] = 0;
+
+ if ((argnum == 1 && e->mod[0])
+ || (argnum == 2 && e->mod[0]
+ && (e->mod[0] == e->mod[1]
+ || e->mod[1] == 0)))
+ {
+ sprintf (buf, "%s%c", opcode, e->mod[0]);
+ *opcode_p = buf;
+ }
+ else if (argnum == 2 && e->mod[0] && e->mod[1])
+ {
+ sprintf (buf, "%s%c%c", opcode, e->mod[1], e->mod[0]);
+ *opcode_p = buf;
+ }
+
+ return args;
+}
+
+/* Character used to seperate the prefix words. */
+/* See radr://4141844 for the enhancement to make this uniformly ' '. */
+#define IASM_PREFIX_SEP '/'
+
+void
+iasm_x86_print_prefix (char *buf, tree prefix_list)
+{
+ buf += strlen (buf);
+ while (prefix_list)
+ {
+ tree prefix = TREE_VALUE (prefix_list);
+ size_t len = IDENTIFIER_LENGTH (prefix);
+ memcpy (buf, IDENTIFIER_POINTER (prefix), len);
+ buf += len;
+ buf[0] = IASM_PREFIX_SEP;
+ ++buf;
+ buf[0] = 0;
+ prefix_list = TREE_CHAIN (prefix_list);
+ }
+}
+
+/* Warn when a variables address is used to form a memory address when
+ that address will use an extra register during reload. */
+
+static void
+iasm_warn_extra_reg (tree arg)
+{
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL))
+ warning (0, "addressing mode too complex, will consume an extra register");
+}
+
+bool
+iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses,
+ bool must_be_reg, bool must_not_be_reg, void *ep)
+{
+ iasm_md_extra_info *e = ep;
+ switch (TREE_CODE (arg))
+ {
+ case BRACKET_EXPR:
+ {
+ tree op1 = TREE_OPERAND (arg, 0);
+ tree op2 = TREE_OPERAND (arg, 1);
+ tree op0 = NULL_TREE, op3 = NULL_TREE;
+ tree scale = NULL_TREE;
+
+ if (op2 == NULL_TREE
+ && TREE_TYPE (op1)
+ && POINTER_TYPE_P (TREE_TYPE (op1)))
+ {
+ /* Let the normal operand printer output this without trying to
+ decompose it into parts so that things like (%esp + 20) + 4
+ can be output as 24(%esp) by the optimizer instead of 4(%0)
+ and burning an "R" with (%esp + 20). */
+ iasm_force_constraint ("m", e);
+ iasm_get_register_var (op1, "", buf, argnum, must_be_reg, e);
+ iasm_force_constraint (0, e);
+ break;
+ }
+
+ if (op2
+ && TREE_CODE (op2) == BRACKET_EXPR)
+ {
+ op3 = TREE_OPERAND (op2, 1);
+ op2 = TREE_OPERAND (op2, 0);
+ if (TREE_CODE (op2) == BRACKET_EXPR)
+ {
+ op0 = TREE_OPERAND (op2, 1);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ }
+ if (op0)
+ return false;
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "[");
+
+ if (op3 == NULL_TREE
+ && op2 && TREE_CODE (op2) == PLUS_EXPR)
+ {
+ op3 = TREE_OPERAND (op2, 0);
+ op2 = TREE_OPERAND (op2, 1);
+ }
+ if (op2 && TREE_CODE (op2) == MULT_EXPR)
+ {
+ tree t;
+ t = op3;
+ op3 = op2;
+ op2 = t;
+ }
+
+ /* Crack out the scaling, if any. */
+ if (ASSEMBLER_DIALECT == ASM_ATT
+ && op3
+ && TREE_CODE (op3) == MULT_EXPR)
+ {
+ if (TREE_CODE (TREE_OPERAND (op3, 1)) == INTEGER_CST)
+ {
+ scale = TREE_OPERAND (op3, 1);
+ op3 = TREE_OPERAND (op3, 0);
+ }
+ else if (TREE_CODE (TREE_OPERAND (op3, 0)) == INTEGER_CST)
+ {
+ scale = TREE_OPERAND (op3, 0);
+ op3 = TREE_OPERAND (op3, 1);
+ }
+ }
+
+ /* Complicated expression as JMP or CALL target. */
+ if (e->modifier && strcmp(e->modifier, "A") == 0)
+ {
+ strcat (buf, "*");
+ e->modifier = 0;
+ }
+ e->as_immediate = true;
+ iasm_print_operand (buf, op1, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_immediate = false;
+
+ /* Just an immediate. */
+ if (op2 == NULL_TREE && op3 == NULL_TREE)
+ break;
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "]");
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "[");
+ else
+ strcat (buf, "(");
+
+ if (op2)
+ {
+ /* We know by context, this has to be an R. */
+ iasm_force_constraint ("R", e);
+ iasm_warn_extra_reg (op2);
+ iasm_print_operand (buf, op2, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ iasm_force_constraint (0, e);
+ }
+ if (op3)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "][");
+ else
+ strcat (buf, ",");
+
+ /* We know by context, this has to be an l. */
+ iasm_force_constraint ("l", e);
+ iasm_warn_extra_reg (op3);
+ iasm_print_operand (buf, op3, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ iasm_force_constraint (0, e);
+ if (scale)
+ {
+ strcat (buf, ",");
+ e->as_immediate = true;
+ iasm_print_operand (buf, scale, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_immediate = false;
+ }
+ }
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "]");
+ else
+ strcat (buf, ")");
+ }
+ break;
+
+ case ADDR_EXPR:
+ if ((TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
+ && ! e->as_immediate)
+ {
+ iasm_get_register_var (arg, "", buf, argnum, must_be_reg, e);
+ break;
+ }
+ if (! e->as_immediate)
+ e->as_offset = true;
+ iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_offset = false;
+ break;
+
+ case MULT_EXPR:
+ iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ strcat (buf, "*");
+ iasm_print_operand (buf, TREE_OPERAND (arg, 1), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+/* APPLE LOCAL end CW asm blocks */
+
+/* Return the mangling of TYPE if it is an extended fundamental type. */
+
+static const char *
+/* APPLE LOCAL mangle_type 7105099 */
+ix86_mangle_type (tree type)
+{
+ /* APPLE LOCAL begin mangle_type 7105099 */
+ type = TYPE_MAIN_VARIANT (type);
+
+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ return NULL;
+
+ /* APPLE LOCAL end mangle_type 7105099 */
+ switch (TYPE_MODE (type))
+ {
+ case TFmode:
+ /* __float128 is "g". */
+ return "g";
+ case XFmode:
+ /* "long double" or __float80 is "e". */
+ return "e";
+ default:
+ return NULL;
+ }
+}
+
+/* For 32-bit code we can save PIC register setup by using
+ __stack_chk_fail_local hidden function instead of calling
+ __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
+ register, so it is better to call __stack_chk_fail directly. */
+
+static tree
+ix86_stack_protect_fail (void)
+{
+ return TARGET_64BIT
+ ? default_external_stack_protect_fail ()
+ : default_hidden_stack_protect_fail ();
+}
+
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ ??? All x86 object file formats are capable of representing this.
+ After all, the relocation needed is the same as for the call insn.
+ Whether or not a particular assembler allows us to enter such, I
+ guess we'll have to see. */
+int
+asm_preferred_eh_data_format (int code, int global)
+{
+ if (flag_pic)
+ {
+ int type = DW_EH_PE_sdata8;
+ if (!TARGET_64BIT
+ || ix86_cmodel == CM_SMALL_PIC
+ || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
+ type = DW_EH_PE_sdata4;
+ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+ }
+ if (ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM && code))
+ return DW_EH_PE_udata4;
+ return DW_EH_PE_absptr;
+}
+
+#include "gt-i386.h"
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.h b/gcc-4.2.1-5666.3/gcc/config/i386/i386.h
new file mode 100644
index 000000000..df7703b36
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.h
@@ -0,0 +1,3230 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* The purpose of this file is to define the characteristics of the i386,
+ independent of assembler syntax or operating system.
+
+ Three other files build on this one to describe a specific assembler syntax:
+ bsd386.h, att386.h, and sun386.h.
+
+ The actual tm.h file for a particular system should include
+ this file, and then the file for the appropriate assembler syntax.
+
+ Many macros that specify assembler syntax are omitted entirely from
+ this file because they really belong in the files for particular
+ assemblers. These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
+ ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
+ that start with ASM_ or end in ASM_OP. */
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+ const int add; /* cost of an add instruction */
+ const int lea; /* cost of a lea instruction */
+ const int shift_var; /* variable shift costs */
+ const int shift_const; /* constant shift costs */
+ const int mult_init[5]; /* cost of starting a multiply
+ in QImode, HImode, SImode, DImode, TImode*/
+ const int mult_bit; /* cost of multiply per each bit set */
+ const int divide[5]; /* cost of a divide/mod
+ in QImode, HImode, SImode, DImode, TImode*/
+ int movsx; /* The cost of movsx operation. */
+ int movzx; /* The cost of movzx operation. */
+ const int large_insn; /* insns larger than this cost more */
+ const int move_ratio; /* The threshold of number of scalar
+ memory-to-memory move insns. */
+ const int movzbl_load; /* cost of loading using movzbl */
+ const int int_load[3]; /* cost of loading integer registers
+ in QImode, HImode and SImode relative
+ to reg-reg move (2). */
+ const int int_store[3]; /* cost of storing integer register
+ in QImode, HImode and SImode */
+ const int fp_move; /* cost of reg,reg fld/fst */
+ const int fp_load[3]; /* cost of loading FP register
+ in SFmode, DFmode and XFmode */
+ const int fp_store[3]; /* cost of storing FP register
+ in SFmode, DFmode and XFmode */
+ const int mmx_move; /* cost of moving MMX register. */
+ const int mmx_load[2]; /* cost of loading MMX register
+ in SImode and DImode */
+ const int mmx_store[2]; /* cost of storing MMX register
+ in SImode and DImode */
+ const int sse_move; /* cost of moving SSE register. */
+ const int sse_load[3]; /* cost of loading SSE register
+ in SImode, DImode and TImode*/
+ const int sse_store[3]; /* cost of storing SSE register
+ in SImode, DImode and TImode*/
+ const int mmxsse_to_integer; /* cost of moving mmxsse register to
+ integer and vice versa. */
+ const int prefetch_block; /* bytes moved to cache for prefetch. */
+ const int simultaneous_prefetches; /* number of parallel prefetch
+ operations. */
+ const int branch_cost; /* Default value for BRANCH_COST. */
+ const int fadd; /* cost of FADD and FSUB instructions. */
+ const int fmul; /* cost of FMUL instruction. */
+ const int fdiv; /* cost of FDIV instruction. */
+ const int fabs; /* cost of FABS instruction. */
+ const int fchs; /* cost of FCHS instruction. */
+ const int fsqrt; /* cost of FSQRT instruction. */
+};
+
+extern const struct processor_costs *ix86_cost;
+
+/* Macros used in the machine description to test the flags. */
+
+/* configure can arrange to make this 2, to force a 486. */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic
+#endif
+
+#ifndef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT \
+ (TARGET_64BIT && TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+#endif
+
+#define TARGET_FLOAT_RETURNS_IN_80387 TARGET_FLOAT_RETURNS
+/* APPLE LOCAL begin AT&T-style stub 4164563 */
+#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".section __IMPORT,__pointers,non_lazy_symbol_pointers"
+/* APPLE LOCAL end AT&T-style stub 4164563 */
+
+/* 64bit Sledgehammer mode. For libgcc2 we make sure this is a
+ compile-time constant. */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#else
+#ifndef TARGET_BI_ARCH
+#undef TARGET_64BIT
+#if TARGET_64BIT_DEFAULT
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+#endif
+
+#define HAS_LONG_COND_BRANCH 1
+#define HAS_LONG_UNCOND_BRANCH 1
+
+#define TARGET_386 (ix86_tune == PROCESSOR_I386)
+#define TARGET_486 (ix86_tune == PROCESSOR_I486)
+#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM)
+#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO)
+#define TARGET_K6 (ix86_tune == PROCESSOR_K6)
+#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON)
+#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
+#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
+#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
+#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
+/* APPLE LOCAL mainline */
+#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
+#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
+#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
+#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
+
+#define TUNEMASK (1 << ix86_tune)
+extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
+extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
+extern const int x86_branch_hints, x86_unroll_strlen;
+extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
+extern const int x86_use_himode_fiop, x86_use_simode_fiop;
+extern const int x86_use_mov0, x86_use_cltd, x86_read_modify_write;
+extern const int x86_read_modify, x86_split_long_moves;
+extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix;
+extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
+extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
+extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
+extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
+extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
+extern const int x86_epilogue_using_move, x86_decompose_lea;
+extern const int x86_arch_always_fancy_math_387, x86_shift1;
+extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
+extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
+extern const int x86_use_ffreep;
+extern const int x86_inter_unit_moves, x86_schedule;
+extern const int x86_use_bt;
+/* APPLE LOCAL override options */
+extern int x86_cmpxchg, x86_cmpxchg8b, x86_cmpxchg16b, x86_xadd;
+extern const int x86_use_incdec;
+extern const int x86_pad_returns;
+/* APPLE LOCAL mainline bswap/local override options */
+extern int x86_bswap;
+extern const int x86_partial_flag_reg_stall;
+extern int x86_prefetch_sse;
+
+#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
+#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
+#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & TUNEMASK)
+#define TARGET_USE_BIT_TEST (x86_use_bit_test & TUNEMASK)
+#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & TUNEMASK)
+/* For sane SSE instruction set generation we need fcomi instruction. It is
+ safe to enable all CMOVE instructions. */
+#define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE)
+#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
+#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK)
+#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK)
+#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & TUNEMASK)
+#define TARGET_USE_SAHF ((x86_use_sahf & TUNEMASK) && !TARGET_64BIT)
+#define TARGET_MOVX (x86_movx & TUNEMASK)
+#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & TUNEMASK)
+#define TARGET_PARTIAL_FLAG_REG_STALL (x86_partial_flag_reg_stall & TUNEMASK)
+#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & TUNEMASK)
+#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & TUNEMASK)
+#define TARGET_USE_MOV0 (x86_use_mov0 & TUNEMASK)
+#define TARGET_USE_CLTD (x86_use_cltd & TUNEMASK)
+#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & TUNEMASK)
+#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & TUNEMASK)
+#define TARGET_READ_MODIFY (x86_read_modify & TUNEMASK)
+#define TARGET_PROMOTE_QImode (x86_promote_QImode & TUNEMASK)
+#define TARGET_FAST_PREFIX (x86_fast_prefix & TUNEMASK)
+#define TARGET_SINGLE_STRINGOP (x86_single_stringop & TUNEMASK)
+#define TARGET_QIMODE_MATH (x86_qimode_math & TUNEMASK)
+#define TARGET_HIMODE_MATH (x86_himode_math & TUNEMASK)
+#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & TUNEMASK)
+#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & TUNEMASK)
+#define TARGET_ADD_ESP_4 (x86_add_esp_4 & TUNEMASK)
+#define TARGET_ADD_ESP_8 (x86_add_esp_8 & TUNEMASK)
+#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & TUNEMASK)
+#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & TUNEMASK)
+#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & TUNEMASK)
+#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ (x86_sse_partial_reg_dependency & TUNEMASK)
+#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
+#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
+#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
+#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK)
+#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
+#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
+#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
+#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
+#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
+#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
+#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
+#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
+#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
+#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
+#define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK)
+#define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK)
+
+#define ASSEMBLER_DIALECT (ix86_asm_dialect)
+
+#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
+#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
+ && (ix86_fpmath & FPMATH_387))
+/* APPLE LOCAL mainline */
+#define TARGET_SSSE3 ((target_flags & MASK_SSSE3) != 0)
+
+#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
+#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
+
+#define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
+#define TARGET_CMPXCHG8B (x86_cmpxchg8b & (1 << ix86_arch))
+#define TARGET_CMPXCHG16B (x86_cmpxchg16b & (1 << ix86_arch))
+#define TARGET_XADD (x86_xadd & (1 << ix86_arch))
+/* APPLE LOCAL mainline bswap */
+#define TARGET_BSWAP (x86_bswap & (1 << ix86_arch))
+
+#ifndef TARGET_64BIT_DEFAULT
+#define TARGET_64BIT_DEFAULT 0
+#endif
+#ifndef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
+#endif
+
+/* Once GDB has been enhanced to deal with functions without frame
+ pointers, we can change this to allow for elimination of
+ the frame pointer in leaf functions. */
+#define TARGET_DEFAULT 0
+/* APPLE LOCAL begin mainline */
+/* Extra bits to force. */
+#define TARGET_SUBTARGET32_DEFAULT 0
+
+#define TARGET_SUBTARGET64_DEFAULT 0
+/* APPLE LOCAL end mainline */
+
+/* This is not really a target flag, but is done this way so that
+ it's analogous to similar code for Mach-O on PowerPC. darwin.h
+ redefines this to 1. */
+#define TARGET_MACHO 0
+/* APPLE LOCAL begin mach-o cleanup */
+#define MACHOPIC_INDIRECT 0
+#define MACHOPIC_PURE 0
+/* APPLE LOCAL end mach-o cleanup */
+
+/* Subtargets may reset this to 1 in order to enable 96-bit long double
+ with the rounding mode forced to 53 bits. */
+#define TARGET_96_ROUND_53_LONG_DOUBLE 0
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+#define OVERRIDE_OPTIONS override_options ()
+
+/* Define this to change the optimizations performed by default. */
+#define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \
+ optimization_options ((LEVEL), (SIZE))
+
+/* -march=native handling only makes sense with compiler running on
+ an x86 or x86_64 chip. If changing this condition, also change
+ the condition in driver-i386.c. */
+#if defined(__i386__) || defined(__x86_64__)
+/* In driver-i386.c. */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+ { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#endif
+
+/* Support for configure-time defaults of some command line options.
+ The order here is important so that -march doesn't squash the
+ tune or cpu values. */
+#define OPTION_DEFAULT_SPECS \
+ {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+ {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+ {"arch", "%{!march=*:-march=%(VALUE)}"}
+
+/* Specs for the compiler proper */
+
+#ifndef CC1_CPU_SPEC
+#define CC1_CPU_SPEC_1 "\
+%{!mtune*: \
+%{m386:mtune=i386 \
+%n`-m386' is deprecated. Use `-march=i386' or `-mtune=i386' instead.\n} \
+%{m486:-mtune=i486 \
+%n`-m486' is deprecated. Use `-march=i486' or `-mtune=i486' instead.\n} \
+%{mpentium:-mtune=pentium \
+%n`-mpentium' is deprecated. Use `-march=pentium' or `-mtune=pentium' instead.\n} \
+%{mpentiumpro:-mtune=pentiumpro \
+%n`-mpentiumpro' is deprecated. Use `-march=pentiumpro' or `-mtune=pentiumpro' instead.\n} \
+%{mcpu=*:-mtune=%* \
+%n`-mcpu=' is deprecated. Use `-mtune=' or '-march=' instead.\n}} \
+%<mcpu=* \
+%{mintel-syntax:-masm=intel \
+%n`-mintel-syntax' is deprecated. Use `-masm=intel' instead.\n} \
+%{mno-intel-syntax:-masm=att \
+%n`-mno-intel-syntax' is deprecated. Use `-masm=att' instead.\n}"
+
+#ifndef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1
+#else
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1 \
+"%{march=native:%<march=native %:local_cpu_detect(arch) \
+ %{!mtune=*:%<mtune=native %:local_cpu_detect(tune)}} \
+%{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#endif
+#endif
+
+/* Target CPU builtins. */
+#define TARGET_CPU_CPP_BUILTINS() \
+ do \
+ { \
+ size_t arch_len = strlen (ix86_arch_string); \
+ size_t tune_len = strlen (ix86_tune_string); \
+ int last_arch_char = ix86_arch_string[arch_len - 1]; \
+ int last_tune_char = ix86_tune_string[tune_len - 1]; \
+ \
+ if (TARGET_64BIT) \
+ { \
+ builtin_assert ("cpu=x86_64"); \
+ builtin_assert ("machine=x86_64"); \
+ builtin_define ("__amd64"); \
+ builtin_define ("__amd64__"); \
+ builtin_define ("__x86_64"); \
+ builtin_define ("__x86_64__"); \
+ } \
+ else \
+ { \
+ builtin_assert ("cpu=i386"); \
+ builtin_assert ("machine=i386"); \
+ builtin_define_std ("i386"); \
+ } \
+ \
+ /* Built-ins based on -mtune= (or -march= if no \
+ -mtune= given). */ \
+ if (TARGET_386) \
+ builtin_define ("__tune_i386__"); \
+ else if (TARGET_486) \
+ builtin_define ("__tune_i486__"); \
+ else if (TARGET_PENTIUM) \
+ { \
+ builtin_define ("__tune_i586__"); \
+ builtin_define ("__tune_pentium__"); \
+ if (last_tune_char == 'x') \
+ builtin_define ("__tune_pentium_mmx__"); \
+ } \
+ else if (TARGET_PENTIUMPRO) \
+ { \
+ builtin_define ("__tune_i686__"); \
+ builtin_define ("__tune_pentiumpro__"); \
+ switch (last_tune_char) \
+ { \
+ case '3': \
+ builtin_define ("__tune_pentium3__"); \
+ /* FALLTHRU */ \
+ case '2': \
+ builtin_define ("__tune_pentium2__"); \
+ break; \
+ } \
+ } \
+ else if (TARGET_K6) \
+ { \
+ builtin_define ("__tune_k6__"); \
+ if (last_tune_char == '2') \
+ builtin_define ("__tune_k6_2__"); \
+ else if (last_tune_char == '3') \
+ builtin_define ("__tune_k6_3__"); \
+ } \
+ else if (TARGET_ATHLON) \
+ { \
+ builtin_define ("__tune_athlon__"); \
+ /* Only plain "athlon" lacks SSE. */ \
+ if (last_tune_char != 'n') \
+ builtin_define ("__tune_athlon_sse__"); \
+ } \
+ else if (TARGET_K8) \
+ builtin_define ("__tune_k8__"); \
+ else if (TARGET_PENTIUM4) \
+ builtin_define ("__tune_pentium4__"); \
+ else if (TARGET_NOCONA) \
+ builtin_define ("__tune_nocona__"); \
+ /* APPLE LOCAL begin mainline */ \
+ else if (TARGET_CORE2) \
+ builtin_define ("__tune_core2__"); \
+ /* APPLE LOCAL end mainline */ \
+ \
+ if (TARGET_MMX) \
+ builtin_define ("__MMX__"); \
+ if (TARGET_3DNOW) \
+ builtin_define ("__3dNOW__"); \
+ if (TARGET_3DNOW_A) \
+ builtin_define ("__3dNOW_A__"); \
+ if (TARGET_SSE) \
+ builtin_define ("__SSE__"); \
+ if (TARGET_SSE2) \
+ builtin_define ("__SSE2__"); \
+ if (TARGET_SSE3) \
+ builtin_define ("__SSE3__"); \
+ /* APPLE LOCAL begin mainline */ \
+ if (TARGET_SSSE3) \
+ builtin_define ("__SSSE3__"); \
+ /* APPLE LOCAL end mainline */ \
+ /* APPLE LOCAL begin 5612787 mainline sse4 */ \
+ if (TARGET_SSE4_1) \
+ builtin_define ("__SSE4_1__"); \
+ if (TARGET_SSE4_2) \
+ builtin_define ("__SSE4_2__"); \
+ if (TARGET_SSE4A) \
+ builtin_define ("__SSE4A__"); \
+ /* APPLE LOCAL end 5612787 mainline sse4 */ \
+ if (TARGET_SSE_MATH && TARGET_SSE) \
+ builtin_define ("__SSE_MATH__"); \
+ if (TARGET_SSE_MATH && TARGET_SSE2) \
+ builtin_define ("__SSE2_MATH__"); \
+ \
+ /* Built-ins based on -march=. */ \
+ if (ix86_arch == PROCESSOR_I486) \
+ { \
+ builtin_define ("__i486"); \
+ builtin_define ("__i486__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_PENTIUM) \
+ { \
+ builtin_define ("__i586"); \
+ builtin_define ("__i586__"); \
+ builtin_define ("__pentium"); \
+ builtin_define ("__pentium__"); \
+ if (last_arch_char == 'x') \
+ builtin_define ("__pentium_mmx__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_PENTIUMPRO) \
+ { \
+ builtin_define ("__i686"); \
+ builtin_define ("__i686__"); \
+ builtin_define ("__pentiumpro"); \
+ builtin_define ("__pentiumpro__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_K6) \
+ { \
+ \
+ builtin_define ("__k6"); \
+ builtin_define ("__k6__"); \
+ if (last_arch_char == '2') \
+ builtin_define ("__k6_2__"); \
+ else if (last_arch_char == '3') \
+ builtin_define ("__k6_3__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_ATHLON) \
+ { \
+ builtin_define ("__athlon"); \
+ builtin_define ("__athlon__"); \
+ /* Only plain "athlon" lacks SSE. */ \
+ if (last_arch_char != 'n') \
+ builtin_define ("__athlon_sse__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_K8) \
+ { \
+ builtin_define ("__k8"); \
+ builtin_define ("__k8__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_PENTIUM4) \
+ { \
+ builtin_define ("__pentium4"); \
+ builtin_define ("__pentium4__"); \
+ } \
+ else if (ix86_arch == PROCESSOR_NOCONA) \
+ { \
+ builtin_define ("__nocona"); \
+ builtin_define ("__nocona__"); \
+ } \
+ /* APPLE LOCAL begin mainline */ \
+ else if (ix86_arch == PROCESSOR_CORE2) \
+ { \
+ builtin_define ("__core2"); \
+ builtin_define ("__core2__"); \
+ } \
+ /* APPLE LOCAL end mainline */ \
+ } \
+ while (0)
+
+#define TARGET_CPU_DEFAULT_i386 0
+#define TARGET_CPU_DEFAULT_i486 1
+#define TARGET_CPU_DEFAULT_pentium 2
+#define TARGET_CPU_DEFAULT_pentium_mmx 3
+#define TARGET_CPU_DEFAULT_pentiumpro 4
+#define TARGET_CPU_DEFAULT_pentium2 5
+#define TARGET_CPU_DEFAULT_pentium3 6
+#define TARGET_CPU_DEFAULT_pentium4 7
+#define TARGET_CPU_DEFAULT_k6 8
+#define TARGET_CPU_DEFAULT_k6_2 9
+#define TARGET_CPU_DEFAULT_k6_3 10
+#define TARGET_CPU_DEFAULT_athlon 11
+#define TARGET_CPU_DEFAULT_athlon_sse 12
+#define TARGET_CPU_DEFAULT_k8 13
+#define TARGET_CPU_DEFAULT_pentium_m 14
+#define TARGET_CPU_DEFAULT_prescott 15
+#define TARGET_CPU_DEFAULT_nocona 16
+#define TARGET_CPU_DEFAULT_generic 17
+/* APPLE LOCAL mainline */
+#define TARGET_CPU_DEFAULT_core2 18
+/* APPLE LOCAL begin mainline */
+#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
+ "pentiumpro", "pentium2", "pentium3", \
+ "pentium4", "k6", "k6-2", "k6-3",\
+ "athlon", "athlon-4", "k8", \
+ "pentium-m", "prescott", "nocona", \
+ "generic", "core2" }
+/* APPLE LOCAL end mainline */
+
+#ifndef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) "
+#endif
+
+/* This macro defines names of additional specifications to put in the
+ specs that can be used in various specifications like CC1_SPEC. Its
+ definition is an initializer with a subgrouping for each command option.
+
+ Each subgrouping contains a string constant, that defines the
+ specification name, and a string constant that used by the GCC driver
+ program.
+
+ Do not define this macro if it does not need to do anything. */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS \
+ { "cc1_cpu", CC1_CPU_SPEC }, \
+ SUBTARGET_EXTRA_SPECS
+
+/* target machine storage layout */
+
+#define LONG_DOUBLE_TYPE_SIZE 80
+
+/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
+ FPU, assume that the fpcw is set to extended precision; when using
+ only SSE, rounding is correct; when using both SSE and the FPU,
+ the rounding precision is indeterminate, since either may be chosen
+ apparently at random. */
+#define TARGET_FLT_EVAL_METHOD \
+ (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
+
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define FLOAT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE BITS_PER_WORD
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_LONG_TYPE_SIZE 64
+
+#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
+#define MAX_BITS_PER_WORD 64
+#else
+#define MAX_BITS_PER_WORD 32
+#endif
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+/* That is true on the 80386. */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+/* That is not true on the 80386. */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+ numbered. */
+/* Not true for 80386 */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#else
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list. */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Boundary (in *bits*) on which stack pointer should be aligned. */
+/* APPLE LOCAL begin compiler should obey -mpreferred-stack-boundary (radar 3232990) */
+/* prefer * #define STACK_BOUNDARY ((ix86_preferred_stack_boundary > 128) ? 128 : ix86_preferred_stack_boundary) */
+/* We're going to extremes to yield a result of indeterminite
+ signedness here; this macro will be expanded in signed and
+ unsigned contexts, and mixed signedness induces fatal
+ warnings. Radar 3941684. */
+#define STACK_BOUNDARY ((ix86_preferred_stack_boundary >= 128) ? 128 : \
+ (ix86_preferred_stack_boundary == 64) ? 64 : 32)
+/* APPLE LOCAL end compiler should obey -mpreferred-stack-boundary (radar 3232990) */
+
+/* Boundary (in *bits*) on which the stack pointer prefers to be
+ aligned; the compiler cannot rely on having this alignment. */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
+/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */
+#define SAVE_PREFERRED_STACK_BOUNDARY ix86_save_preferred_stack_boundary
+/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */
+
+/* As of July 2001, many runtimes do not align the stack properly when
+ entering main. This causes expand_main_function to forcibly align
+ the stack, which results in aligned frames for functions called from
+ main, though it does nothing for the alignment of main itself. */
+#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
+ (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)
+
+/* Minimum allocation boundary for the code of a function. */
+#define FUNCTION_BOUNDARY 8
+
+/* C++ stores the virtual bit in the lowest bit of function pointers. */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_pfn
+
+/* Alignment of field after `int : 0' in a structure. */
+
+#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD
+
+/* Minimum size in bits of the largest boundary to which any
+ and all fundamental data types supported by the hardware
+ might need to be aligned. No data type wants to be aligned
+ rounder than this.
+
+ Pentium+ prefers DFmode values to be aligned to 64 bit boundary
+ and Pentium Pro XFmode values at 128 bit boundaries. */
+
+#define BIGGEST_ALIGNMENT 128
+
+/* Decide whether a variable of mode MODE should be 128 bit aligned. */
+#define ALIGN_MODE_128(MODE) \
+ ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
+
+/* The published ABIs say that doubles should be aligned on word
+ boundaries, so lower the alignment for structure fields unless
+ -malign-double is set. */
+
+/* ??? Blah -- this macro is used directly by libobjc. Since it
+ supports no vector modes, cut out the complexity and fall back
+ on BIGGEST_FIELD_ALIGNMENT. */
+#ifdef IN_TARGET_LIBS
+#ifdef __x86_64__
+#define BIGGEST_FIELD_ALIGNMENT 128
+#else
+#define BIGGEST_FIELD_ALIGNMENT 32
+#endif
+#else
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+ x86_field_alignment (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment given to a
+ constant that is being placed in memory. EXP is the constant
+ and ALIGN is the alignment that the object would ordinarily have.
+ The value of this macro is used instead of that alignment to align
+ the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ The typical use of this macro is to increase alignment for string
+ constants to be word aligned so that `strcpy' calls that copy
+ constants can be done inline. */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) ix86_constant_alignment ((EXP), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+ variable. TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. Another is to
+ cause character arrays to be word-aligned so that `strcpy' calls
+ that copy constants to character arrays can be done inline. */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+ variable. TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) ix86_local_alignment ((TYPE), (ALIGN))
+
+/* If defined, a C expression that gives the alignment boundary, in
+ bits, of an argument with the specified mode and type. If it is
+ not defined, `PARM_BOUNDARY' is used for all arguments. */
+
+#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \
+ ix86_function_arg_boundary ((MODE), (TYPE))
+
+/* Set this nonzero if move instructions will actually fail to work
+ when given unaligned data. */
+#define STRICT_ALIGNMENT 0
+
+/* If bit field type is int, don't let it cross an int,
+ and give entire struct the alignment of an int. */
+/* Required on the 386 since it doesn't have bit-field insns. */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Standard register usage. */
+
+/* This processor has special stack-like registers. See reg-stack.c
+ for details. */
+
+#define STACK_REGS
+#define IS_STACK_MODE(MODE) \
+ (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \
+ || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \
+ || (MODE) == XFmode)
+
+/* Number of actual hardware registers.
+ The hardware registers are assigned numbers for the compiler
+ from 0 to just below FIRST_PSEUDO_REGISTER.
+ All registers that the compiler knows about must be given numbers,
+ even those that are not normally considered general registers.
+
+ In the 80386 we give the 8 general purpose registers the numbers 0-7.
+ We number the floating point registers 8-15.
+ Note that registers 0-7 can be accessed as a short or int,
+ while only 0-3 may be used with byte `mov' instructions.
+
+ Reg 16 does not correspond to any hardware register, but instead
+ appears in the RTL as an argument pointer prior to reload, and is
+ eliminated during reloading in favor of either the stack or frame
+ pointer. */
+
+#define FIRST_PSEUDO_REGISTER 53
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+ If not defined, equals FIRST_PSEUDO_REGISTER. */
+
+#define DWARF_FRAME_REGISTERS 17
+
+/* 1 for registers that have pervasive standard uses
+ and are not available for the register allocator.
+ On the 80386, the stack pointer is such, as is the arg pointer.
+
+ The value is zero if the register is not fixed on either 32 or
+ 64 bit targets, one if the register if fixed on both 32 and 64
+ bit targets, two if it is only fixed on 32bit targets and three
+ if its only fixed on 64bit targets.
+ Proper values are computed in the CONDITIONAL_REGISTER_USAGE.
+ */
+#define FIXED_REGISTERS \
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
+{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
+/*arg,flags,fpsr,dir,frame*/ \
+ 1, 1, 1, 1, 1, \
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
+ 2, 2, 2, 2, 2, 2, 2, 2, \
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
+ 2, 2, 2, 2, 2, 2, 2, 2}
+
+
+/* 1 for registers not available across function calls.
+ These must include the FIXED_REGISTERS and also any
+ registers that can be used without being saved.
+ The latter must include the registers where values are returned
+ and the register where structure-value addresses are passed.
+ Aside from that, you can include as many other registers as you like.
+
+ The value is zero if the register is not call used on either 32 or
+ 64 bit targets, one if the register if call used on both 32 and 64
+ bit targets, two if it is only call used on 32bit targets and three
+ if its only call used on 64bit targets.
+ Proper values are computed in the CONDITIONAL_REGISTER_USAGE.
+*/
+#define CALL_USED_REGISTERS \
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
+{ 1, 1, 1, 0, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+/*arg,flags,fpsr,dir,frame*/ \
+ 1, 1, 1, 1, 1, \
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
+ 1, 1, 1, 1, 2, 2, 2, 2, \
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1} \
+
+/* Order in which to allocate registers. Each register must be
+ listed once, even those in FIXED_REGISTERS. List frame pointer
+ late and fixed registers last. Note that, in general, we prefer
+ registers listed in CALL_USED_REGISTERS, keeping the others
+ available for storage of persistent values.
+
+ The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order,
+ so this is just empty initializer for array. */
+
+#define REG_ALLOC_ORDER \
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
+ 48, 49, 50, 51, 52 }
+
+/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
+ to be rearranged based on a particular function. When using sse math,
+ we want to allocate SSE before x87 registers and vice vera. */
+
+#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
+
+
+/* Macro to conditionally modify fixed_regs/call_used_regs. */
+#define CONDITIONAL_REGISTER_USAGE \
+do { \
+ int i; \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ { \
+ if (fixed_regs[i] > 1) \
+ fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); \
+ if (call_used_regs[i] > 1) \
+ call_used_regs[i] = (call_used_regs[i] \
+ == (TARGET_64BIT ? 3 : 2)); \
+ } \
+ if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) \
+ { \
+ fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \
+ call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \
+ } \
+ if (! TARGET_MMX) \
+ { \
+ int i; \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ } \
+ if (! TARGET_SSE) \
+ { \
+ int i; \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ } \
+ if (! TARGET_80387 && ! TARGET_FLOAT_RETURNS_IN_80387) \
+ { \
+ int i; \
+ HARD_REG_SET x; \
+ COPY_HARD_REG_SET (x, reg_class_contents[(int)FLOAT_REGS]); \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (x, i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ } \
+ if (! TARGET_64BIT) \
+ { \
+ int i; \
+ for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) \
+ reg_names[i] = ""; \
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) \
+ reg_names[i] = ""; \
+ } \
+ } while (0)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+ to hold something of mode MODE.
+ This is ordinarily the length in words of a value of mode MODE
+ but can be less for certain modes in special long registers.
+
+ Actually there are no two word move instructions for consecutive
+ registers. And only registers 0-3 may have mov byte instructions
+ applied to them.
+ */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+ (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
+ : ((MODE) == XFmode \
+ ? (TARGET_64BIT ? 2 : 3) \
+ : (MODE) == XCmode \
+ ? (TARGET_64BIT ? 4 : 6) \
+ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+
+#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE) \
+ ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT) \
+ ? (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ ? 0 \
+ : ((MODE) == XFmode || (MODE) == XCmode)) \
+ : 0)
+
+#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+
+#define VALID_SSE2_REG_MODE(MODE) \
+ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
+ || (MODE) == V2DImode || (MODE) == DFmode)
+
+#define VALID_SSE_REG_MODE(MODE) \
+ ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \
+ || (MODE) == SFmode || (MODE) == TFmode)
+
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+ ((MODE) == V2SFmode || (MODE) == SFmode)
+
+#define VALID_MMX_REG_MODE(MODE) \
+ ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
+/* APPLE LOCAL 4656532 use V1DImode for _m64 */ \
+ || (MODE) == V2SImode || (MODE) == SImode || (MODE) == V1DImode)
+
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+ place emms and femms instructions. */
+#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
+
+#define VALID_FP_MODE_P(MODE) \
+ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
+ || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \
+
+#define VALID_INT_MODE_P(MODE) \
+ ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode \
+ || (MODE) == DImode \
+ || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode \
+ || (MODE) == CDImode \
+ || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode \
+ || (MODE) == TFmode || (MODE) == TCmode)))
+
+/* Return true for modes passed in SSE registers. */
+#define SSE_REG_MODE_P(MODE) \
+ ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
+ || (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \
+ || (MODE) == V4SFmode || (MODE) == V4SImode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ ix86_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+ when one has mode MODE1 and one has mode MODE2.
+ If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+ for any hard reg, then this must be 0 for correct output. */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) ix86_modes_tieable_p (MODE1, MODE2)
+
+/* It is possible to write patterns to move flags; but until someone
+ does it, */
+#define AVOID_CCMODE_COPIES
+
+/* Specify the modes required to caller save a given hard regno.
+ We do this on i386 to prevent flags from being saved at all.
+
+ Kill any attempts to combine saving of modes. */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+ (CC_REGNO_P (REGNO) ? VOIDmode \
+ : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode \
+ : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false)\
+ : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode \
+ : (MODE) == QImode && (REGNO) >= 4 && !TARGET_64BIT ? SImode \
+ : (MODE))
+/* Specify the registers used for certain standard purposes.
+ The values of these macros are register numbers. */
+
+/* on the 386 the pc register is %eip, and is not usable as a general
+ register. The ordinary mov instructions won't work */
+/* #define PC_REGNUM */
+
+/* Register to use for pushing function arguments. */
+#define STACK_POINTER_REGNUM 7
+
+/* Base register for access to local variables of the function. */
+#define HARD_FRAME_POINTER_REGNUM 6
+
+/* Base register for access to local variables of the function. */
+#define FRAME_POINTER_REGNUM 20
+
+/* First floating point reg */
+#define FIRST_FLOAT_REG 8
+
+/* First & last stack-like regs */
+#define FIRST_STACK_REG FIRST_FLOAT_REG
+#define LAST_STACK_REG (FIRST_FLOAT_REG + 7)
+
+#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
+#define LAST_SSE_REG (FIRST_SSE_REG + 7)
+
+#define FIRST_MMX_REG (LAST_SSE_REG + 1)
+#define LAST_MMX_REG (FIRST_MMX_REG + 7)
+
+#define FIRST_REX_INT_REG (LAST_MMX_REG + 1)
+#define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7)
+
+#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1)
+#define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7)
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms
+ may be accessed via the stack pointer) in functions that seem suitable.
+ This is computed in `reload', in reload1.c. */
+#define FRAME_POINTER_REQUIRED ix86_frame_pointer_required ()
+
+/* Override this in other tm.h files to cope with various OS lossage
+ requiring a frame pointer. */
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Make sure we can access arbitrary call frames. */
+#define SETUP_FRAME_ADDRESSES() ix86_setup_frame_addresses ()
+
+/* Base register for access to arguments of the function. */
+#define ARG_POINTER_REGNUM 16
+
+/* Register in which static-chain is passed to a function.
+ We do use ECX as static chain register for 32 bit ABI. On the
+ 64bit ABI, ECX is an argument register, so we use R10 instead. */
+#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? FIRST_REX_INT_REG + 10 - 8 : 2)
+
+/* Register to hold the addressing base for position independent
+ code access to data items. We don't use PIC pointer for 64bit
+ mode. Define the regnum to dummy value to prevent gcc from
+ pessimizing code dealing with EBX.
+
+ To avoid clobbering a call-saved register unnecessarily, we renumber
+ the pic register when possible. The change is visible after the
+ prologue has been emitted. */
+
+#define REAL_PIC_OFFSET_TABLE_REGNUM 3
+
+#define PIC_OFFSET_TABLE_REGNUM \
+ /* APPLE LOCAL begin 5695218 */ \
+ ((TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC) \
+ || !flag_pic ? INVALID_REGNUM \
+ : reload_completed && pic_offset_table_rtx ? REGNO (pic_offset_table_rtx) \
+ : REAL_PIC_OFFSET_TABLE_REGNUM) \
+ /* APPLE LOCAL end 5695218 */
+
+#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
+
+/* A C expression which can inhibit the returning of certain function
+ values in registers, based on the type of value. A nonzero value
+ says to return the function value in memory, just as large
+ structures are always returned. Here TYPE will be a C expression
+ of type `tree', representing the data type of the value.
+
+ Note that values of mode `BLKmode' must be explicitly handled by
+ this macro. Also, the option `-fpcc-struct-return' takes effect
+ regardless of this macro. On most systems, it is possible to
+ leave the macro undefined; this causes a default definition to be
+ used, whose value is the constant 1 for `BLKmode' values, and 0
+ otherwise.
+
+ Do not use this macro to indicate that structures and unions
+ should always be returned in memory. You should instead use
+ `DEFAULT_PCC_STRUCT_RETURN' to indicate this. */
+
+#define RETURN_IN_MEMORY(TYPE) \
+ ix86_return_in_memory (TYPE)
+
+/* APPLE LOCAL begin radar 4781080 */
+#define OBJC_FPRETURN_MSGCALL(TYPE,WHICH) \
+ ix86_objc_fpreturn_msgcall (TYPE, WHICH)
+/* APPLE LOCAL end radar 4781080 */
+
+/* This is overridden by <cygwin.h>. */
+#define MS_AGGREGATE_RETURN 0
+
+/* This is overridden by <netware.h>. */
+#define KEEP_AGGREGATE_RETURN_POINTER 0
+
+/* Define the classes of registers for register constraints in the
+ machine description. Also define ranges of constants.
+
+ One of the classes must always be named ALL_REGS and include all hard regs.
+ If there is more than one class, another class must be named NO_REGS
+ and contain no registers.
+
+ The name GENERAL_REGS must be the name of a class (or an alias for
+ another name such as ALL_REGS). This is the class of registers
+ that is allowed by "g" or "r" in a register constraint.
+ Also, registers outside this class are allocated only when
+ instructions express preferences for them.
+
+ The classes must be numbered in nondecreasing order; that is,
+ a larger-numbered class must never be contained completely
+ in a smaller-numbered class.
+
+ For any two classes, it is very desirable that there be another
+ class that represents their union.
+
+ It might seem that class BREG is unnecessary, since no useful 386
+ opcode needs reg %ebx. But some systems pass args to the OS in ebx,
+ and the "b" register constraint is useful in asms for syscalls.
+
+ The flags and fpsr registers are in no class. */
+
+enum reg_class
+{
+ NO_REGS,
+ AREG, DREG, CREG, BREG, SIREG, DIREG,
+ AD_REGS, /* %eax/%edx for DImode */
+ Q_REGS, /* %eax %ebx %ecx %edx */
+ NON_Q_REGS, /* %esi %edi %ebp %esp */
+ INDEX_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp */
+ LEGACY_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp */
+ GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/
+ FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */
+ FLOAT_REGS,
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ SSE_FIRST_REG,
+ SSE_REGS,
+ MMX_REGS,
+ FP_TOP_SSE_REGS,
+ FP_SECOND_SSE_REGS,
+ FLOAT_SSE_REGS,
+ FLOAT_INT_REGS,
+ INT_SSE_REGS,
+ FLOAT_INT_SSE_REGS,
+ ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define INTEGER_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), GENERAL_REGS)
+#define FLOAT_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), FLOAT_REGS)
+#define SSE_CLASS_P(CLASS) \
+ ((CLASS) == SSE_REGS)
+#define MMX_CLASS_P(CLASS) \
+ ((CLASS) == MMX_REGS)
+#define MAYBE_INTEGER_CLASS_P(CLASS) \
+ reg_classes_intersect_p ((CLASS), GENERAL_REGS)
+#define MAYBE_FLOAT_CLASS_P(CLASS) \
+ reg_classes_intersect_p ((CLASS), FLOAT_REGS)
+#define MAYBE_SSE_CLASS_P(CLASS) \
+ reg_classes_intersect_p (SSE_REGS, (CLASS))
+#define MAYBE_MMX_CLASS_P(CLASS) \
+ reg_classes_intersect_p (MMX_REGS, (CLASS))
+
+#define Q_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), Q_REGS)
+
+/* Give names of register classes as strings for dump file. */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", \
+ "AREG", "DREG", "CREG", "BREG", \
+ "SIREG", "DIREG", \
+ "AD_REGS", \
+ "Q_REGS", "NON_Q_REGS", \
+ "INDEX_REGS", \
+ "LEGACY_REGS", \
+ "GENERAL_REGS", \
+ "FP_TOP_REG", "FP_SECOND_REG", \
+ "FLOAT_REGS", \
+ /* APPLE LOCAL 5612787 mainline sse4 */ \
+ "SSE_FIRST_REG", \
+ "SSE_REGS", \
+ "MMX_REGS", \
+ "FP_TOP_SSE_REGS", \
+ "FP_SECOND_SSE_REGS", \
+ "FLOAT_SSE_REGS", \
+ "FLOAT_INT_REGS", \
+ "INT_SSE_REGS", \
+ "FLOAT_INT_SSE_REGS", \
+ "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+ This is an initializer for a vector of HARD_REG_SET
+ of length N_REG_CLASSES. */
+
+#define REG_CLASS_CONTENTS \
+{ { 0x00, 0x0 }, \
+ { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \
+ { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \
+ { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \
+ { 0x03, 0x0 }, /* AD_REGS */ \
+ { 0x0f, 0x0 }, /* Q_REGS */ \
+ { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \
+ { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \
+ { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \
+ { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \
+ { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
+ { 0xff00, 0x0 }, /* FLOAT_REGS */ \
+/* APPLE LOCAL 5612787 mainline sse4 */ \
+ { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \
+{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \
+{ 0xe0000000, 0x1f }, /* MMX_REGS */ \
+{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \
+{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \
+{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \
+ { 0x1ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \
+{ 0x1fe100ff,0x1fffe0 }, /* INT_SSE_REGS */ \
+{ 0x1fe1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \
+{ 0xffffffff,0x1fffff } \
+}
+
+/* The same information, inverted:
+ Return the class number of the smallest class containing
+ reg number REGNO. This could be a conditional expression
+ or could index an array. */
+
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* When defined, the compiler allows registers explicitly used in the
+ rtl to be used as spill registers but prevents the compiler from
+ extending the lifetime of these registers. */
+
+#define SMALL_REGISTER_CLASSES 1
+
+#define QI_REG_P(X) \
+ (REG_P (X) && REGNO (X) < 4)
+
+#define GENERAL_REGNO_P(N) \
+ ((N) < 8 || REX_INT_REGNO_P (N))
+
+#define GENERAL_REG_P(X) \
+ (REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+
+#define ANY_QI_REG_P(X) (TARGET_64BIT ? GENERAL_REG_P(X) : QI_REG_P (X))
+
+#define NON_QI_REG_P(X) \
+ (REG_P (X) && REGNO (X) >= 4 && REGNO (X) < FIRST_PSEUDO_REGISTER)
+
+#define REX_INT_REGNO_P(N) ((N) >= FIRST_REX_INT_REG && (N) <= LAST_REX_INT_REG)
+#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X)))
+
+#define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X)))
+#define FP_REGNO_P(N) ((N) >= FIRST_STACK_REG && (N) <= LAST_STACK_REG)
+#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
+#define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N))
+
+#define SSE_REGNO_P(N) \
+ (((N) >= FIRST_SSE_REG && (N) <= LAST_SSE_REG) \
+ || ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG))
+
+#define REX_SSE_REGNO_P(N) \
+ ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG)
+
+#define SSE_REGNO(N) \
+ ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
+#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
+
+#define SSE_FLOAT_MODE_P(MODE) \
+ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+
+#define MMX_REGNO_P(N) ((N) >= FIRST_MMX_REG && (N) <= LAST_MMX_REG)
+#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
+
+#define STACK_REG_P(XOP) \
+ (REG_P (XOP) && \
+ REGNO (XOP) >= FIRST_STACK_REG && \
+ REGNO (XOP) <= LAST_STACK_REG)
+
+#define NON_STACK_REG_P(XOP) (REG_P (XOP) && ! STACK_REG_P (XOP))
+
+#define STACK_TOP_P(XOP) (REG_P (XOP) && REGNO (XOP) == FIRST_STACK_REG)
+
+#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define CC_REGNO_P(X) ((X) == FLAGS_REG || (X) == FPSR_REG)
+
+/* The class value for index registers, and the one for base regs. */
+
+#define INDEX_REG_CLASS INDEX_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Place additional restrictions on the register class to use when it
+ is necessary to be able to hold a value of mode MODE in a reload
+ register for which class CLASS would ordinarily be used. */
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS) \
+ ((MODE) == QImode && !TARGET_64BIT \
+ && ((CLASS) == ALL_REGS || (CLASS) == GENERAL_REGS \
+ || (CLASS) == LEGACY_REGS || (CLASS) == INDEX_REGS) \
+ ? Q_REGS : (CLASS))
+
+/* Given an rtx X being reloaded into a reg required to be
+ in class CLASS, return the class of reg to actually use.
+ In general this is just CLASS; but on some machines
+ in some cases it is preferable to use a more restrictive class.
+ On the 80386 series, we prevent floating constants from being
+ reloaded into floating registers (since no move-insn can do that)
+ and we ensure that QImodes aren't reloaded into the esi or edi reg. */
+
+/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+ ix86_preferred_reload_class ((X), (CLASS))
+
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+
+#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
+ ix86_preferred_output_reload_class ((X), (CLASS))
+
+/* If we are copying between general and FP registers, we need a memory
+ location. The same is true for SSE and MMX registers. */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+ ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1)
+
+/* QImode spills from non-QI registers need a scratch. This does not
+ happen often -- the only example so far requires an uninitialized
+ pseudo. */
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, OUT) \
+ (((CLASS) == GENERAL_REGS || (CLASS) == LEGACY_REGS \
+ || (CLASS) == INDEX_REGS) && !TARGET_64BIT && (MODE) == QImode \
+ ? Q_REGS : NO_REGS)
+
+/* Return the maximum number of consecutive registers
+ needed to represent mode MODE in a register of class CLASS. */
+/* On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+ (!MAYBE_INTEGER_CLASS_P (CLASS) \
+ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
+ : (((((MODE) == XFmode ? 12 : GET_MODE_SIZE (MODE))) \
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* A C expression whose value is nonzero if pseudos that have been
+ assigned to registers of class CLASS would likely be spilled
+ because registers of CLASS are needed for spill registers.
+
+ The default value of this macro returns 1 if CLASS has exactly one
+ register and zero otherwise. On most machines, this default
+ should be used. Only define this macro to some other expression
+ if pseudo allocated by `local-alloc.c' end up in memory because
+ their hard registers were needed for spill registers. If this
+ macro returns nonzero for those classes, those pseudos will only
+ be allocated by `global.c', which knows how to reallocate the
+ pseudo to another register. If there would not be another
+ register available for reallocation, you should not change the
+ definition of this macro since the only effect of such a
+ definition would be to slow down register allocation. */
+
+#define CLASS_LIKELY_SPILLED_P(CLASS) \
+ (((CLASS) == AREG) \
+ || ((CLASS) == DREG) \
+ || ((CLASS) == CREG) \
+ || ((CLASS) == BREG) \
+ || ((CLASS) == AD_REGS) \
+ || ((CLASS) == SIREG) \
+ || ((CLASS) == DIREG) \
+ || ((CLASS) == FP_TOP_REG) \
+ || ((CLASS) == FP_SECOND_REG))
+
+/* Return a class of registers that cannot change FROM mode to TO mode. */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+ ix86_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling. */
+
+/* Define this if pushing a word on the stack
+ makes the stack pointer a smaller address. */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+ is at the high-address end of the local variables;
+ that is, each additional local variable allocated
+ goes at a more negative offset in the frame. */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+ If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+ first local allocated. Otherwise, it is the offset to the BEGINNING
+ of the first local allocated. */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+ this says how many the stack pointer really advances by.
+ On 386, we have pushw instruction that decrements by exactly 2 no
+ matter what the position was, there is no pushb.
+ But as CIE data alignment factor on this arch is -4, we need to make
+ sure all stack pointer adjustments are in multiple of 4.
+
+ For 64bit ABI we round up to 8 bytes.
+ */
+
+#define PUSH_ROUNDING(BYTES) \
+ (TARGET_64BIT \
+ ? (((BYTES) + 7) & (-8)) \
+ : (((BYTES) + 3) & (-4)))
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+ be computed and placed into the variable
+ `current_function_outgoing_args_size'. No space will be pushed onto the
+ stack for each call; instead, the function prologue should increase the stack
+ frame size by this amount. */
+
+#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS
+
+/* If defined, a C expression whose value is nonzero when we want to use PUSH
+ instructions to pass outgoing arguments. */
+
+#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS)
+
+/* We want the stack and args grow in opposite directions, even if
+ PUSH_ARGS is 0. */
+#define PUSH_ARGS_REVERSED 1
+
+/* Offset of first parameter from the argument pointer register value. */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define this macro if functions should assume that stack space has been
+ allocated for arguments even when their values are passed in registers.
+
+ The value of this macro is the size, in bytes, of the area reserved for
+ arguments passed in registers for the function represented by FNDECL.
+
+ This space can be allocated by the caller, or be a part of the
+ machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says
+ which. */
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+/* Value is the number of bytes of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the 80386, the RTD insn may be used to pop them if the number
+ of args is fixed, but if the number is variable then the caller
+ must pop them all. RTD can't be used for library calls now
+ because the library is compiled with the Unix compiler.
+ Use of RTD is a selectable option, since it is incompatible with
+ standard Unix calling sequences. If the option is not selected,
+ the caller must always pop the args.
+
+ The attribute stdcall is equivalent to RTD on a per module basis. */
+
+#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \
+ ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE))
+
+#define FUNCTION_VALUE_REGNO_P(N) \
+ ix86_function_value_regno_p (N)
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+
+#define LIBCALL_VALUE(MODE) \
+ ix86_libcall_value (MODE)
+
+/* Define the size of the result block used for communication between
+ untyped_call and untyped_return. The block contains a DImode value
+ followed by the block used by fnsave and frstor. */
+
+#define APPLY_RESULT_SIZE (8+108)
+
+/* 1 if N is a possible register number for function argument passing. */
+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
+
+/* Define a data type for recording info about an argument list
+ during the scan of that argument list. This data type should
+ hold all necessary information about the function itself
+ and about the args processed so far, enough to enable macros
+ such as FUNCTION_ARG to determine where the next arg should go. */
+
+typedef struct ix86_args {
+ int words; /* # words passed so far */
+ int nregs; /* # registers available for passing */
+ int regno; /* next available register number */
+ int fastcall; /* fastcall calling convention is used */
+ int sse_words; /* # sse words passed so far */
+ int sse_nregs; /* # sse registers available for passing */
+ int warn_sse; /* True when we want to warn about SSE ABI. */
+ int warn_mmx; /* True when we want to warn about MMX ABI. */
+ int sse_regno; /* next available sse register number */
+ int mmx_words; /* # mmx words passed so far */
+ int mmx_nregs; /* # mmx registers available for passing */
+ int mmx_regno; /* next available mmx register number */
+ int maybe_vaarg; /* true for calls to possibly vardic fncts. */
+ int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should
+ be passed in SSE registers. Otherwise 0. */
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+ init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL))
+
+/* Update the data in CUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
+ function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED))
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+ function_arg (&(CUM), (MODE), (TYPE), (NAMED))
+
+/* Implement `va_start' for varargs and stdarg. */
+#define EXPAND_BUILTIN_VA_START(VALIST, NEXTARG) \
+ ix86_va_start (VALIST, NEXTARG)
+
+#define TARGET_ASM_FILE_END ix86_file_end
+#define NEED_INDICATE_EXEC_STACK 0
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry. */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO)
+
+#define MCOUNT_NAME "_mcount"
+
+#define PROFILE_COUNT_REGISTER "edx"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+ the stack pointer does not matter. The value is tested only in
+ functions that have frame pointers.
+ No definition is equivalent to always zero. */
+/* Note on the 386 it might be more efficient not to define this since
+ we have to restore it ourselves from the frame pointer, in order to
+ use pop */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+ of a trampoline, leaving space for the variable parts. */
+
+/* On the 386, the trampoline contains two instructions:
+ mov #STATIC,ecx
+ jmp FUNCTION
+ The trampoline is generated entirely at runtime. The operand of JMP
+ is the address of FUNCTION relative to the instruction following the
+ JMP (which is 5 bytes long). */
+
+/* Length in units of the trampoline for entering a nested function. */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 23 : 10)
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+
+#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
+ x86_initialize_trampoline ((TRAMP), (FNADDR), (CXT))
+
+/* Definitions for register eliminations.
+
+ This is an array of structures. Each structure initializes one pair
+ of eliminable registers. The "from" register number is given first,
+ followed by "to". Eliminations of the same "from" register are listed
+ in order of preference.
+
+ There are two registers that can always be eliminated on the i386.
+ The frame pointer and the arg pointer can be replaced by either the
+ hard frame pointer or to the stack pointer, depending upon the
+ circumstances. The hard frame pointer is not used before reload and
+ so it is not eligible for elimination. */
+
+#define ELIMINABLE_REGS \
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} \
+
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. Frame pointer elimination is automatically handled.
+
+ All other eliminations are valid. */
+
+#define CAN_ELIMINATE(FROM, TO) \
+ ((TO) == STACK_POINTER_REGNUM ? ! frame_pointer_needed : 1)
+
+/* Define the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ ((OFFSET) = ix86_initial_elimination_offset ((FROM), (TO)))
+
+/* Addressing modes, and classification of registers for them. */
+
+/* Macros to check register numbers against specific register classes. */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+ They give nonzero only if REGNO is a hard reg of the suitable class
+ or a pseudo reg currently allocated to a suitable hard reg.
+ Since they use reg_renumber, they are safe only once reg_renumber
+ has been allocated, which happens in local-alloc.c. */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+ ((REGNO) < STACK_POINTER_REGNUM \
+ || (REGNO >= FIRST_REX_INT_REG \
+ && (REGNO) <= LAST_REX_INT_REG) \
+ || ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \
+ && (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \
+ || (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+ ((REGNO) <= STACK_POINTER_REGNUM \
+ || (REGNO) == ARG_POINTER_REGNUM \
+ || (REGNO) == FRAME_POINTER_REGNUM \
+ || (REGNO >= FIRST_REX_INT_REG \
+ && (REGNO) <= LAST_REX_INT_REG) \
+ || ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \
+ && (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \
+ || (unsigned) reg_renumber[(REGNO)] <= STACK_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_SIREG_P(REGNO) \
+ ((REGNO) == 4 || reg_renumber[(REGNO)] == 4)
+#define REGNO_OK_FOR_DIREG_P(REGNO) \
+ ((REGNO) == 5 || reg_renumber[(REGNO)] == 5)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+ and check its validity for a certain class.
+ We have two alternate definitions for each of them.
+ The usual definition accepts all pseudo regs; the other rejects
+ them unless they have been allocated suitable hard regs.
+ The symbol REG_OK_STRICT causes the latter definition to be used.
+
+ Most source files want to accept pseudo regs in the hope that
+ they will get allocated to the class that the insn wants them to be in.
+ Source files for reload pass need to be strict.
+ After reload, it makes no difference, since pseudo regs have
+ been eliminated by then. */
+
+
+/* Non strict versions, pseudos are ok. */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X) \
+ (REGNO (X) < STACK_POINTER_REGNUM \
+ || (REGNO (X) >= FIRST_REX_INT_REG \
+ && REGNO (X) <= LAST_REX_INT_REG) \
+ || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#define REG_OK_FOR_BASE_NONSTRICT_P(X) \
+ (REGNO (X) <= STACK_POINTER_REGNUM \
+ || REGNO (X) == ARG_POINTER_REGNUM \
+ || REGNO (X) == FRAME_POINTER_REGNUM \
+ || (REGNO (X) >= FIRST_REX_INT_REG \
+ && REGNO (X) <= LAST_REX_INT_REG) \
+ || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Strict versions, hard registers only */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_STRICT_P (X)
+#endif
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+ that is a valid memory address for an instruction.
+ The MODE argument is the machine mode for the MEM expression
+ that wants to use this address.
+
+ The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS,
+ except for CONSTANT_ADDRESS_P which is usually machine-independent.
+
+ See legitimize_pic_address in i386.c for details as to what
+ constitutes a legitimate address when -fpic is used. */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X) constant_address_p (X)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+ It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X)
+
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \
+do { \
+ if (legitimate_address_p ((MODE), (X), 1)) \
+ goto ADDR; \
+} while (0)
+
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \
+do { \
+ if (legitimate_address_p ((MODE), (X), 0)) \
+ goto ADDR; \
+} while (0)
+
+#endif
+
+/* If defined, a C expression to determine the base term of address X.
+ This macro is used in only one place: `find_base_term' in alias.c.
+
+ It is always safe for this macro to not be defined. It exists so
+ that alias analysis can understand machine-dependent addresses.
+
+ The typical use of this macro is to handle addresses containing
+ a label_ref or symbol_ref within an UNSPEC. */
+
+#define FIND_BASE_TERM(X) ix86_find_base_term (X)
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the 80386, we handle X+REG by loading X into a register R and
+ using R+REG. R will go in a general reg and indexing will be used.
+ However, if REG is a broken-out memory address or multiplication,
+ nothing needs to be done because REG can certainly go in a general reg.
+
+ When -fpic is used, special handling is needed for symbolic references.
+ See comments by legitimize_pic_address in i386.c for details. */
+
+#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) \
+do { \
+ (X) = legitimize_address ((X), (OLDX), (MODE)); \
+ if (memory_address_p ((MODE), (X))) \
+ goto WIN; \
+} while (0)
+
+#define REWRITE_ADDRESS(X) rewrite_address (X)
+
+/* Nonzero if the constant value X is a legitimate general operand
+ when generating PIC code. It is given that flag_pic is on and
+ that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+#define SYMBOLIC_CONST(X) \
+ (GET_CODE (X) == SYMBOL_REF \
+ || GET_CODE (X) == LABEL_REF \
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+ has an effect that depends on the machine mode it is used for.
+ On the 80386, only postdecrement and postincrement address depend thus
+ (the amount of decrement or increment being the length of the operand). */
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \
+do { \
+ if (GET_CODE (ADDR) == POST_INC \
+ || GET_CODE (ADDR) == POST_DEC) \
+ goto LABEL; \
+} while (0)
+
+/* Max number of args passed in registers. If this is more than 3, we will
+ have problems with ebx (register #4), since it is a caller save register and
+ is also used as the pic register in ELF. So for now, don't allow more than
+ 3 registers to be passed in registers. */
+
+#define REGPARM_MAX (TARGET_64BIT ? 6 : 3)
+
+/* APPLE LOCAL regparmandstackparm */
+#define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_MACHO ? 4 : (TARGET_SSE ? 3 : 0)))
+
+#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
+
+
+/* Specify the machine mode that this machine uses
+ for the index in the tablejump instruction. */
+#define CASE_VECTOR_MODE (!TARGET_64BIT || flag_pic ? SImode : DImode)
+
+/* Define this as 1 if `char' should by default be signed; else as 0. */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Number of bytes moved into a data cache for a single prefetch operation. */
+#define PREFETCH_BLOCK ix86_cost->prefetch_block
+
+/* Number of prefetch operations that can be done in parallel. */
+#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
+
+/* Max number of bytes we can move from memory to memory
+ in one reasonably fast instruction. */
+#define MOVE_MAX 16
+
+/* MOVE_MAX_PIECES is the number of bytes at a time which we can
+ move efficiently, as opposed to MOVE_MAX which is the maximum
+ number of bytes we can move with a single instruction. */
+#define MOVE_MAX_PIECES (TARGET_64BIT ? 8 : 4)
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+ move-instruction pairs, we will do a movmem or libcall instead.
+ Increasing the value will always make code faster, but eventually
+ incurs high cost in increased code size.
+
+ If you don't define this, a reasonable default is used. */
+
+#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
+
+/* If a clear memory operation would take CLEAR_RATIO or more simple
+ move-instruction sequences, we will do a clrmem or libcall instead. */
+
+#define CLEAR_RATIO (optimize_size ? 2 \
+ : ix86_cost->move_ratio > 6 ? 6 : ix86_cost->move_ratio)
+
+/* Define if shifts truncate the shift count
+ which implies one can omit a sign-extension or zero-extension
+ of a shift count. */
+/* On i386, shifts do truncate the count. But bit opcodes don't. */
+
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+ is done just by pretending it is already truncated. */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A macro to update M and UNSIGNEDP when an object whose type is
+ TYPE and which has the specified mode and signedness is to be
+ stored in a register. This macro is only called when TYPE is a
+ scalar type.
+
+ On i386 it is sometimes useful to promote HImode and QImode
+ quantities to SImode. The choice depends on target type. */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+do { \
+ if (((MODE) == HImode && TARGET_PROMOTE_HI_REGS) \
+ || ((MODE) == QImode && TARGET_PROMOTE_QI_REGS)) \
+ (MODE) = SImode; \
+} while (0)
+
+/* Specify the machine mode that pointers have.
+ After generation of rtl, the compiler makes no further distinction
+ between pointers and any other objects of this machine mode. */
+#define Pmode (TARGET_64BIT ? DImode : SImode)
+
+/* A function address in a call instruction
+ is a byte address (for indexing purposes)
+ so give the MEM rtx a byte's mode. */
+#define FUNCTION_MODE QImode
+
+/* A C expression for the cost of moving data from a register in class FROM to
+ one in class TO. The classes are expressed using the enumeration values
+ such as `GENERAL_REGS'. A value of 2 is the default; other values are
+ interpreted relative to that.
+
+ It is not required that the cost always equal 2 when FROM is the same as TO;
+ on some machines it is expensive to move between registers if they are not
+ general registers. */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \
+ ix86_register_move_cost ((MODE), (CLASS1), (CLASS2))
+
+/* A C expression for the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost. */
+
+#define MEMORY_MOVE_COST(MODE, CLASS, IN) \
+ ix86_memory_move_cost ((MODE), (CLASS), (IN))
+
+/* A C expression for the cost of a branch instruction. A value of 1
+ is the default; other values are interpreted relative to that. */
+
+#define BRANCH_COST ix86_branch_cost
+
+/* Define this macro as a C expression which is nonzero if accessing
+ less than a word of memory (i.e. a `char' or a `short') is no
+ faster than accessing a word of memory, i.e., if such access
+ require more than one instruction or if there is no difference in
+ cost between byte and (aligned) word loads.
+
+ When this macro is not defined, the compiler will access a field by
+ finding the smallest containing object; when it is defined, a
+ fullword load will be used if alignment permits. Unless bytes
+ accesses are faster than word accesses, using word accesses is
+ preferable since it may eliminate subsequent memory access if
+ subsequent accesses occur to other fields in the same word of the
+ structure, but to different bytes. */
+
+/* APPLE LOCAL 6131435 */
+#define SLOW_BYTE_ACCESS (!flag_apple_kext && !flag_mkernel && !TARGET_64BIT)
+
+/* Nonzero if access to memory by shorts is slow and undesirable. */
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a
+ cost many times greater than aligned accesses, for example if they
+ are emulated in a trap handler.
+
+ When this macro is nonzero, the compiler will act as if
+ `STRICT_ALIGNMENT' were nonzero when generating code for block
+ moves. This can cause significantly more instructions to be
+ produced. Therefore, do not set this macro nonzero if unaligned
+ accesses only add a cycle or two to the time for a memory access.
+
+ If the value of this macro is always zero, it need not be defined. */
+
+/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */
+
+/* Define this macro if it is as good or better to call a constant
+ function address than to call an address kept in a register.
+
+ Desirable on the 386 because a CALL with a constant address is
+ faster than one with a register address. */
+
+#define NO_FUNCTION_CSE
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+ return the mode to be used for the comparison.
+
+ For floating-point equality comparisons, CCFPEQmode should be used.
+ VOIDmode should be used in all other cases.
+
+ For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+ possible, to allow for more combinations. */
+
+#define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+ reversed. */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* A C expression whose value is reversed condition code of the CODE for
+ comparison done in CC_MODE mode. */
+#define REVERSE_CONDITION(CODE, MODE) ix86_reverse_condition ((CODE), (MODE))
+
+
+/* Control the assembler format that we output, to the extent
+ this does not vary between assemblers. */
+
+/* How to refer to registers in assembler output.
+ This sequence is indexed by compiler's hard-register-number (see above). */
+
+/* In order to refer to the first 8 regs as 32 bit regs, prefix an "e".
+ For non floating point regs, the following are the HImode names.
+
+ For float regs, the stack top is sometimes referred to as "%st(0)"
+ instead of just "%st". PRINT_OPERAND handles this with the "y" code. */
+
+#define HI_REGISTER_NAMES \
+{"ax","dx","cx","bx","si","di","bp","sp", \
+ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)", \
+ "argp", "flags", "fpsr", "dirflag", "frame", \
+ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" , \
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
+
+#define REGISTER_NAMES HI_REGISTER_NAMES
+
+/* Table of additional register names to use in user input. */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 }, \
+ { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 }, \
+ { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 }, \
+ { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 }, \
+ { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 }, \
+ { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 } }
+
+/* Note we are omitting these since currently I don't know how
+to get gcc to use these, since they want the same but different
+number as al, and ax.
+*/
+
+#define QI_REGISTER_NAMES \
+{"al", "dl", "cl", "bl", "sil", "dil", "bpl", "spl",}
+
+/* These parallel the array above, and can be used to access bits 8:15
+ of regs 0 through 3. */
+
+#define QI_HIGH_REGISTER_NAMES \
+{"ah", "dh", "ch", "bh", }
+
+/* How to renumber registers for dbx and gdb. */
+
+#define DBX_REGISTER_NUMBER(N) \
+ (TARGET_64BIT ? dbx64_register_map[(N)] : dbx_register_map[(N)])
+
+extern int const dbx_register_map[FIRST_PSEUDO_REGISTER];
+extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER];
+extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
+
+/* Before the prologue, RA is at 0(%esp). */
+#define INCOMING_RETURN_ADDR_RTX \
+ gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at -4(AP) in the current frame. */
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+ ((COUNT) == 0 \
+ ? gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, -UNITS_PER_WORD)) \
+ : gen_rtx_MEM (Pmode, plus_constant (FRAME, UNITS_PER_WORD)))
+
+/* PC is dbx register 8; let's use that column for RA. */
+#define DWARF_FRAME_RETURN_COLUMN (TARGET_64BIT ? 16 : 8)
+
+/* Before the prologue, the top of the frame is at 4(%esp). */
+#define INCOMING_FRAME_SP_OFFSET UNITS_PER_WORD
+
+/* Describe how we implement __builtin_eh_return. */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2)
+
+
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ ??? All x86 object file formats are capable of representing this.
+ After all, the relocation needed is the same as for the call insn.
+ Whether or not a particular assembler allows us to enter such, I
+ guess we'll have to see. */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* This is how to output an insn to push a register on the stack.
+ It need not be very fast code. */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \
+do { \
+ if (TARGET_64BIT) \
+ asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n", \
+ reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \
+ else \
+ asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]); \
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+ It need not be very fast code. */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+do { \
+ if (TARGET_64BIT) \
+ asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n", \
+ reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \
+ else \
+ asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]); \
+} while (0)
+
+/* This is how to output an element of a case-vector that is absolute. */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+ ix86_output_addr_vec_elt ((FILE), (VALUE))
+
+/* This is how to output an element of a case-vector that is relative. */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+ ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
+
+/* Under some conditions we need jump tables in the text section,
+ because the assembler cannot handle label differences between
+ sections. This is the case for x86_64 on Mach-O for example. */
+
+#define JUMP_TABLES_IN_TEXT_SECTION \
+ (flag_pic && ((TARGET_MACHO && TARGET_64BIT) \
+ || (!TARGET_64BIT && !HAVE_AS_GOTOFF_IN_DATA)))
+
+/* Switch to init or fini section via SECTION_OP, emit a call to FUNC,
+ and switch back. For x86 we do this only to save a few bytes that
+ would otherwise be unused in the text section. */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+ asm (SECTION_OP "\n\t" \
+ "call " USER_LABEL_PREFIX #FUNC "\n" \
+ TEXT_SECTION_ASM_OP);
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+ CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+ Effect of various CODE letters is described in i386.c near
+ print_operand function. */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+ ((CODE) == '*' || (CODE) == '+' || (CODE) == '&')
+
+#define PRINT_OPERAND(FILE, X, CODE) \
+ print_operand ((FILE), (X), (CODE))
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+ print_operand_address ((FILE), (ADDR))
+
+#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL) \
+do { \
+ if (! output_addr_const_extra (FILE, (X))) \
+ goto FAIL; \
+} while (0);
+
+/* a letter which is not needed by the normal asm syntax, which
+ we can use for operand syntax in the extended asm */
+
+#define ASM_OPERAND_LETTER '#'
+#define RET return ""
+#define AT_SP(MODE) (gen_rtx_MEM ((MODE), stack_pointer_rtx))
+
+/* Which processor to schedule for. The cpu attribute defines a list that
+ mirrors this list, so changes to i386.md must be made at the same time. */
+
+enum processor_type
+{
+ PROCESSOR_I386, /* 80386 */
+ PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
+ PROCESSOR_PENTIUM,
+ PROCESSOR_PENTIUMPRO,
+ PROCESSOR_K6,
+ PROCESSOR_ATHLON,
+ PROCESSOR_PENTIUM4,
+ PROCESSOR_K8,
+ PROCESSOR_NOCONA,
+ /* APPLE LOCAL mainline */
+ PROCESSOR_CORE2,
+ PROCESSOR_GENERIC32,
+ PROCESSOR_GENERIC64,
+ PROCESSOR_max
+};
+
+extern enum processor_type ix86_tune;
+extern enum processor_type ix86_arch;
+
+enum fpmath_unit
+{
+ FPMATH_387 = 1,
+ FPMATH_SSE = 2
+};
+
+extern enum fpmath_unit ix86_fpmath;
+
+enum tls_dialect
+{
+ TLS_DIALECT_GNU,
+ TLS_DIALECT_GNU2,
+ TLS_DIALECT_SUN
+};
+
+extern enum tls_dialect ix86_tls_dialect;
+
+enum cmodel {
+ CM_32, /* The traditional 32-bit ABI. */
+ CM_SMALL, /* Assumes all code and data fits in the low 31 bits. */
+ CM_KERNEL, /* Assumes all code and data fits in the high 31 bits. */
+ CM_MEDIUM, /* Assumes code fits in the low 31 bits; data unlimited. */
+ CM_LARGE, /* No assumptions. */
+ CM_SMALL_PIC, /* Assumes code+data+got/plt fits in a 31 bit region. */
+ CM_MEDIUM_PIC /* Assumes code+got/plt fits in a 31 bit region. */
+};
+
+extern enum cmodel ix86_cmodel;
+
+/* Size of the RED_ZONE area. */
+#define RED_ZONE_SIZE 128
+/* Reserved area of the red zone for temporaries. */
+#define RED_ZONE_RESERVE 8
+
+enum asm_dialect {
+ ASM_ATT,
+ ASM_INTEL
+};
+
+extern enum asm_dialect ix86_asm_dialect;
+/* APPLE LOCAL begin regparmandstackparm */
+extern void ix86_darwin_handle_regparmandstackparm (tree fndecl);
+extern void ix86_darwin_redirect_calls(void);
+/* APPLE LOCAL end regparmandstackparm */
+
+extern unsigned int ix86_preferred_stack_boundary;
+/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */
+extern unsigned int ix86_save_preferred_stack_boundary;
+/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */
+extern int ix86_branch_cost, ix86_section_threshold;
+
+/* Smallest class containing REGNO. */
+extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER];
+
+extern rtx ix86_compare_op0; /* operand 0 for comparisons */
+extern rtx ix86_compare_op1; /* operand 1 for comparisons */
+extern rtx ix86_compare_emitted;
+
+/* To properly truncate FP values into integers, we need to set i387 control
+ word. We can't emit proper mode switching code before reload, as spills
+ generated by reload may truncate values incorrectly, but we still can avoid
+ redundant computation of new control word by the mode switching pass.
+ The fldcw instructions are still emitted redundantly, but this is probably
+ not going to be noticeable problem, as most CPUs do have fast path for
+ the sequence.
+
+ The machinery is to emit simple truncation instructions and split them
+ before reload to instructions having USEs of two memory locations that
+ are filled by this code to old and new control word.
+
+ Post-reload pass may be later used to eliminate the redundant fildcw if
+ needed. */
+
+enum ix86_entity
+{
+ I387_TRUNC = 0,
+ I387_FLOOR,
+ I387_CEIL,
+ I387_MASK_PM,
+ MAX_386_ENTITIES
+};
+
+enum ix86_stack_slot
+{
+ SLOT_VIRTUAL = 0,
+ SLOT_TEMP,
+ SLOT_CW_STORED,
+ SLOT_CW_TRUNC,
+ SLOT_CW_FLOOR,
+ SLOT_CW_CEIL,
+ SLOT_CW_MASK_PM,
+ MAX_386_STACK_LOCALS
+};
+
+/* Define this macro if the port needs extra instructions inserted
+ for mode switching in an optimizing compilation. */
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
+ ix86_optimize_mode_switching[(ENTITY)]
+
+/* If you define `OPTIMIZE_MODE_SWITCHING', you have to define this as
+ initializer for an array of integers. Each initializer element N
+ refers to an entity that needs mode switching, and specifies the
+ number of different modes that might need to be set for this
+ entity. The position of the initializer in the initializer -
+ starting counting at zero - determines the integer that is used to
+ refer to the mode-switched entity in question. */
+
+#define NUM_MODES_FOR_MODE_SWITCHING \
+ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+
+/* ENTITY is an integer specifying a mode-switched entity. If
+ `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+ return an integer value not larger than the corresponding element
+ in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
+ must be switched into prior to the execution of INSN. */
+
+#define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+
+/* This macro specifies the order in which modes for ENTITY are
+ processed. 0 is the highest priority. */
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) (N)
+
+/* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
+ is the set of hard registers live at the point where the insn(s)
+ are to be inserted. */
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+ ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
+ ? emit_i387_cw_initialization (MODE), 0 \
+ : 0)
+
+
+/* Avoid renaming of stack registers, as doing so in combination with
+ scheduling just increases amount of live registers at time and in
+ the turn amount of fxch instructions needed.
+
+ ??? Maybe Pentium chips benefits from renaming, someone can try.... */
+
+#define HARD_REGNO_RENAME_OK(SRC, TARGET) \
+ ((SRC) < FIRST_STACK_REG || (SRC) > LAST_STACK_REG)
+
+
+#define DLL_IMPORT_EXPORT_PREFIX '#'
+
+#define FASTCALL_PREFIX '@'
+
+struct machine_function GTY(())
+{
+ struct stack_local_entry *stack_locals;
+ const char *some_ld_name;
+ rtx force_align_arg_pointer;
+ int save_varrargs_registers;
+ int accesses_prev_frame;
+ int optimize_mode_switching[MAX_386_ENTITIES];
+ /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
+ determine the style used. */
+ int use_fast_prologue_epilogue;
+ /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
+ for. */
+ int use_fast_prologue_epilogue_nregs;
+ /* If true, the current function needs the default PIC register, not
+ an alternate register (on x86) and must not use the red zone (on
+ x86_64), even if it's a leaf function. We don't want the
+ function to be regarded as non-leaf because TLS calls need not
+ affect register allocation. This flag is set when a TLS call
+ instruction is expanded within a function, and never reset, even
+ if all such instructions are optimized away. Use the
+ ix86_current_function_calls_tls_descriptor macro for a better
+ approximation. */
+ int tls_descriptor_call_expanded_p;
+};
+
+#define ix86_stack_locals (cfun->machine->stack_locals)
+#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
+#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_tls_descriptor_calls_expanded_in_cfun \
+ (cfun->machine->tls_descriptor_call_expanded_p)
+/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
+ calls are optimized away, we try to detect cases in which it was
+ optimized away. Since such instructions (use (reg REG_SP)), we can
+ verify whether there's any such instruction live by testing that
+ REG_SP is live. */
+#define ix86_current_function_calls_tls_descriptor \
+ (ix86_tls_descriptor_calls_expanded_in_cfun && regs_ever_live[SP_REG])
+
+/* Control behavior of x86_file_start. */
+#define X86_FILE_START_VERSION_DIRECTIVE false
+#define X86_FILE_START_FLTUSED false
+
+/* APPLE LOCAL begin CW asm blocks */
+#undef TARGET_IASM_EXTRA_INFO
+#define TARGET_IASM_EXTRA_INFO \
+ char mod[3]; \
+ bool as_immediate; \
+ bool as_offset; \
+ bool pseudo;
+
+#define TARGET_IASM_REORDER_ARG(OPCODE, NEWARGNUM, NUM_ARGS, ARGNUM) \
+ do { \
+ /* If we are outputting AT&T style assembly language, the argument \
+ numbering is reversed. */ \
+ if (iasm_x86_needs_swapping (opcode)) \
+ NEWARGNUM = NUM_ARGS - ARGNUM + 1; \
+ } while (0)
+
+#define IASM_SYNTH_CONSTRAINTS(R, ARGNUM, NUM_ARGS, DB) \
+ do { \
+ /* On x86, operand 2 or 3 can be left out and the assembler will deal with it. \
+ \
+ Take for example an opcode: \
+ \
+ opcode r m i \
+ \
+ We allow: \
+ \
+ opcode r mi \
+ \
+ when we have only 2 operands. */ \
+ if (R \
+ && ARGNUM == 2 \
+ && NUM_ARGS == 2 \
+ && R < &DB[sizeof(DB) / sizeof (DB[0]) - 1] \
+ && strcmp (R[1].opcode, R->opcode) == 0 \
+ && R[1].argnum == 3) \
+ { \
+ tree t; \
+ size_t len = strlen (r->constraint) + strlen (r[1].constraint) + 1; \
+ char *p = alloca (len); \
+ \
+ sprintf(p, "%s%s", r->constraint, r[1].constraint); \
+ t = build_string (len, p); \
+ return TREE_STRING_POINTER (t); \
+ } \
+ } while (0)
+
+#define TARGET_IASM_PRINT_OP(BUF, ARG, ARGNUM, USES, MUST_BE_REG, MUST_NOT_BE_REG, E) \
+ iasm_print_op (BUF, ARG, ARGNUM, USES, MUST_BE_REG, MUST_NOT_BE_REG, E)
+
+extern tree iasm_x86_canonicalize_operands (const char **, tree, void *);
+/* On x86, we can rewrite opcodes, change argument ordering and so no... */
+#define IASM_CANONICALIZE_OPERANDS(OPCODE, NEW_OPCODE, IARGS, E) \
+ do { \
+ NEW_OPCODE = OPCODE; \
+ IARGS = iasm_x86_canonicalize_operands (&NEW_OPCODE, IARGS, E); \
+ } while (0)
+
+#define IASM_SEE_OPCODE(YYCHAR, T) \
+ /* If we see an int, arrange to see it as an identifier (opcode), \
+ not as a type. */ \
+ ((YYCHAR == TYPESPEC \
+ && C_RID_CODE (T) == RID_INT) \
+ ? IDENTIFIER : YYCHAR)
+
+/* Return true iff the ID is a prefix for an instruction. */
+
+#define IASM_IS_PREFIX(ID) \
+ do { \
+ const char *myname = IDENTIFIER_POINTER (ID); \
+ if (strcasecmp (myname, "lock") == 0 \
+ || strcasecmp (myname, "rep") == 0 \
+ || strcasecmp (myname, "repe") == 0 \
+ || strcasecmp (myname, "repz") == 0 \
+ || strcasecmp (myname, "repne") == 0 \
+ || strcasecmp (myname, "repnz") == 0) \
+ return true; \
+ } while (0)
+
+#define IASM_PRINT_PREFIX(BUF, PREFIX_LIST) iasm_x86_print_prefix(BUF, PREFIX_LIST)
+
+#define IASM_IMMED_PREFIX(E, BUF) \
+ do { \
+ if (!E->pseudo && ! E->as_immediate) \
+ sprintf (BUF + strlen (BUF), "$"); \
+ } while (0)
+
+#define IASM_OFFSET_PREFIX(E, BUF) \
+ do { \
+ if (E->as_offset) \
+ sprintf (BUF + strlen (BUF), "$"); \
+ } while (0)
+
+/* We can't yet expose ST(x) to reg-stack.c, don't try. */
+#define IASM_HIDE_REG(R) FP_REGNO_P (R)
+
+#define IASM_SEE_IMMEDIATE(E) \
+ E->as_immediate = true
+
+#define IASM_SEE_NO_IMMEDIATE(E) \
+ E->as_immediate = false
+
+/* Table of instructions that need extra constraints. Keep this table sorted. */
+#undef TARGET_IASM_OP_CONSTRAINT
+#define TARGET_IASM_OP_CONSTRAINT \
+ { "adc", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64 },\
+ { "adc", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64 },\
+ { "add", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64 },\
+ { "add", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "addpd", 1, "+x"}, \
+ { "addpd", 2, "xm"}, \
+ { "addps", 1, "+x"}, \
+ { "addps", 2, "xm"}, \
+ { "addsd", 1, "+x"}, \
+ { "addsd", 2, "xm"}, \
+ { "addss", 1, "+x"}, \
+ { "addss", 2, "xm"}, \
+ { "addsubpd", 1, "+x"}, \
+ { "addsubpd", 2, "xm"}, \
+ { "addsubps", 1, "+x"}, \
+ { "addsubps", 2, "xm"}, \
+ { "and", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "and", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "andnpd", 1, "+x"}, \
+ { "andnpd", 2, "xm"}, \
+ { "andnps", 1, "+x"}, \
+ { "andnps", 2, "xm"}, \
+ { "andpd", 1, "+x"}, \
+ { "andpd", 2, "xm"}, \
+ { "andps", 1, "+x"}, \
+ { "andps", 2, "xm"}, \
+ { NX "arpl", 1, "+" rm16}, \
+ { NX "arpl", 2, r16}, \
+ { "bound", 1, U("r")}, \
+ { "bound", 2, U("m")}, \
+ { "bsf", 1, "=r"}, \
+ { "bsf", 2, "rm"}, \
+ { "bsr", 1, "=r"}, \
+ { "bsr", 2, "rm"}, \
+ { "bt", 1, "rm"}, \
+ { "bt", 2, "ri"}, \
+ { "btc", 1, "rm"}, \
+ { "btc", 2, "ri"}, \
+ { "btr", 1, "rm"}, \
+ { "btr", 2, "ri"}, \
+ { "bts", 1, "rm"}, \
+ { "bts", 2, "ri"}, \
+ { NX "call", 1, "rsm"}, \
+ { "clflush", 1, "=m"}, \
+ { "cmova", 1, r16 "," r32 C R64},\
+ { "cmova", 2, rm16 "," rm32 C RM64},\
+ { "cmovae", 2, "rm"}, \
+ { "cmovb", 2, "rm"}, \
+ { "cmovbe", 2, "rm"}, \
+ { "cmovc", 2, "rm"}, \
+ { "cmove", 2, "rm"}, \
+ { "cmovg", 2, "rm"}, \
+ { "cmovge", 2, "rm"}, \
+ { "cmovl", 2, "rm"}, \
+ { "cmovle", 2, "rm"}, \
+ { "cmovna", 2, "rm"}, \
+ { "cmovnae", 2, "rm"}, \
+ { "cmovnb", 2, "rm"}, \
+ { "cmovnbe", 2, "rm"}, \
+ { "cmovnc", 2, "rm"}, \
+ { "cmovne", 2, "rm"}, \
+ { "cmovng", 2, "rm"}, \
+ { "cmovnge", 2, "rm"}, \
+ { "cmovnl", 2, "rm"}, \
+ { "cmovnle", 2, "rm"}, \
+ { "cmovno", 2, "rm"}, \
+ { "cmovnp", 2, "rm"}, \
+ { "cmovns", 2, "rm"}, \
+ { "cmovnz", 2, "rm"}, \
+ { "cmovo", 2, "rm"}, \
+ { "cmovp", 2, "rm"}, \
+ { "cmovpe", 2, "rm"}, \
+ { "cmovpo", 2, "rm"}, \
+ { "cmovs", 2, "rm"}, \
+ { "cmovz", 2, "rm"}, \
+ { "cmp", 1, rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "cmp", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "cmpeqpd", 1, "=x"}, \
+ { "cmpeqpd", 2, "xm"}, \
+ { "cmpeqps", 1, "=x"}, \
+ { "cmpeqps", 2, "xm"}, \
+ { "cmpeqsd", 1, "=x"}, \
+ { "cmpeqsd", 2, "xm"}, \
+ { "cmpeqss", 1, "=x"}, \
+ { "cmpeqss", 2, "xm"}, \
+ { "cmplepd", 1, "=x"}, \
+ { "cmplepd", 2, "xm"}, \
+ { "cmpleps", 1, "=x"}, \
+ { "cmpleps", 2, "xm"}, \
+ { "cmplesd", 1, "=x"}, \
+ { "cmplesd", 2, "xm"}, \
+ { "cmpless", 1, "=x"}, \
+ { "cmpless", 2, "xm"}, \
+ { "cmpltpd", 1, "=x"}, \
+ { "cmpltpd", 2, "xm"}, \
+ { "cmpltps", 1, "=x"}, \
+ { "cmpltps", 2, "xm"}, \
+ { "cmpltsd", 1, "=x"}, \
+ { "cmpltsd", 2, "xm"}, \
+ { "cmpltss", 1, "=x"}, \
+ { "cmpltss", 2, "xm"}, \
+ { "cmpneqpd", 1, "=x"}, \
+ { "cmpneqpd", 2, "xm"}, \
+ { "cmpneqps", 1, "=x"}, \
+ { "cmpneqps", 2, "xm"}, \
+ { "cmpneqsd", 1, "=x"}, \
+ { "cmpneqsd", 2, "xm"}, \
+ { "cmpneqss", 1, "=x"}, \
+ { "cmpneqss", 2, "xm"}, \
+ { "cmpnlepd", 1, "=x"}, \
+ { "cmpnlepd", 2, "xm"}, \
+ { "cmpnleps", 1, "=x"}, \
+ { "cmpnleps", 2, "xm"}, \
+ { "cmpnlesd", 1, "=x"}, \
+ { "cmpnlesd", 2, "xm"}, \
+ { "cmpnless", 1, "=x"}, \
+ { "cmpnless", 2, "xm"}, \
+ { "cmpnltpd", 1, "=x"}, \
+ { "cmpnltpd", 2, "xm"}, \
+ { "cmpnltps", 1, "=x"}, \
+ { "cmpnltps", 2, "xm"}, \
+ { "cmpnltsd", 1, "=x"}, \
+ { "cmpnltsd", 2, "xm"}, \
+ { "cmpnltss", 1, "=x"}, \
+ { "cmpnltss", 2, "xm"}, \
+ { "cmpordpd", 1, "=x"}, \
+ { "cmpordpd", 2, "xm"}, \
+ { "cmpordps", 1, "=x"}, \
+ { "cmpordps", 2, "xm"}, \
+ { "cmpordsd", 1, "=x"}, \
+ { "cmpordsd", 2, "xm"}, \
+ { "cmpordss", 1, "=x"}, \
+ { "cmpordss", 2, "xm"}, \
+ { "cmppd", 1, "=x"}, \
+ { "cmppd", 2, "xm"}, \
+ { "cmppd", 3, "i"}, \
+ { "cmpps", 1, "=x"}, \
+ { "cmpps", 2, "xm"}, \
+ { "cmpps", 3, "i"}, \
+ { "cmpsd", 1, "=x"}, \
+ { "cmpsd", 2, "xm"}, \
+ { "cmpsd", 3, "i"}, \
+ { "cmpss", 1, "=x"}, \
+ { "cmpss", 2, "xm"}, \
+ { "cmpss", 3, "i"}, \
+ { "cmpunordpd", 1, "=x"}, \
+ { "cmpunordpd", 2, "xm"}, \
+ { "cmpunordps", 1, "=x"}, \
+ { "cmpunordps", 2, "xm"}, \
+ { "cmpunordsd", 1, "=x"}, \
+ { "cmpunordsd", 2, "xm"}, \
+ { "cmpunordss", 1, "=x"}, \
+ { "cmpunordss", 2, "xm"}, \
+ { "cmpxchg", 1, "+mr"}, \
+ { "cmpxchg", 2, "r"}, \
+ { "comisd", 1, "x"}, \
+ { "comisd", 2, "xm"}, \
+ { "comiss", 1, "x"}, \
+ { "comiss", 2, "xm"}, \
+ { "cvtdq2pd", 1, "=x"}, \
+ { "cvtdq2pd", 2, "xm"}, \
+ { "cvtdq2ps", 1, "=x"}, \
+ { "cvtdq2ps", 2, "xm"}, \
+ { "cvtpd2dq", 1, "=x"}, \
+ { "cvtpd2dq", 2, "xm"}, \
+ { "cvtpd2pi", 1, "=y"}, \
+ { "cvtpd2pi", 2, "xm"}, \
+ { "cvtpd2ps", 1, "=x"}, \
+ { "cvtpd2ps", 2, "xm"}, \
+ { "cvtpi2pd", 1, "=x"}, \
+ { "cvtpi2pd", 2, "ym"}, \
+ { "cvtpi2ps", 1, "=x"}, \
+ { "cvtpi2ps", 2, "ym"}, \
+ { "cvtps2dq", 1, "=x"}, \
+ { "cvtps2dq", 2, "xm"}, \
+ { "cvtps2pd", 1, "=x"}, \
+ { "cvtps2pd", 2, "xm"}, \
+ { "cvtps2pi", 1, "=y"}, \
+ { "cvtps2pi", 2, "xm"}, \
+ { "cvtsd2si", 1, "=" r32R64}, \
+ { "cvtsd2si", 2, "xm"}, \
+ { "cvtsd2ss", 1, "=x"}, \
+ { "cvtsd2ss", 2, "xm"}, \
+ { "cvtsi2sd", 1, "=x"}, \
+ { "cvtsi2sd", 2, rm32RM64}, \
+ { "cvtsi2ss", 1, "=x"}, \
+ { "cvtsi2ss", 2, rm32RM64}, \
+ { "cvtss2sd", 1, "=x"}, \
+ { "cvtss2sd", 2, "xm"}, \
+ { "cvtss2si", 1, "=r"}, \
+ { "cvtss2si", 2, "xm"}, \
+ { "cvttpd2dq", 1, "=x"}, \
+ { "cvttpd2dq", 2, "xm"}, \
+ { "cvttpd2pi", 1, "=y"}, \
+ { "cvttpd2pi", 2, "xm"}, \
+ { "cvttps2dq", 1, "=x"}, \
+ { "cvttps2dq", 2, "xm"}, \
+ { "cvttps2pi", 1, "=y"}, \
+ { "cvttps2pi", 2, "xm"}, \
+ { "cvttsd2si", 1, "=r"}, \
+ { "cvttsd2si", 2, "xm"}, \
+ { "cvttss2si", 1, "=r"}, \
+ { "cvttss2si", 2, "xm"}, \
+ { "dec", 1, "+" rm8rm16rm32RM64},\
+ { "div", 1, rm8rm16rm32}, \
+ { "divpd", 1, "+x"}, \
+ { "divpd", 2, "xm"}, \
+ { "divps", 1, "+x"}, \
+ { "divps", 2, "xm"}, \
+ { "divsd", 1, "+x"}, \
+ { "divsd", 2, "xm"}, \
+ { "divss", 1, "+x"}, \
+ { "divss", 2, "xm"}, \
+ { "enter", 1, "i"}, \
+ { "enter", 2, "i"}, \
+ { "fadd", 1, "+t,f,@"}, \
+ { "fadd", 2, "f,t," m32fpm64fp},\
+ { "faddp", 1, "+f"}, \
+ { "faddp", 2, "t"}, \
+ { "fbld", 1, "m"}, \
+ { "fbstp", 1, "m"}, \
+ { "fcmovb", 1, "=t"}, \
+ { "fcmovb", 2, "f"}, \
+ { "fcmovbe", 1, "=t"}, \
+ { "fcmovbe", 2, "f"}, \
+ { "fcmove", 1, "=t"}, \
+ { "fcmove", 2, "f"}, \
+ { "fcmovnb", 1, "=t"}, \
+ { "fcmovnb", 2, "f"}, \
+ { "fcmovnbe", 1, "=t"}, \
+ { "fcmovnbe", 2, "f"}, \
+ { "fcmovne", 1, "=t"}, \
+ { "fcmovne", 2, "f"}, \
+ { "fcmovnu", 1, "=t"}, \
+ { "fcmovnu", 2, "f"}, \
+ { "fcmovu", 1, "=t"}, \
+ { "fcmovu", 2, "f"}, \
+ { "fcom", 1, "f" m32fpm64fp}, \
+ { "fcomi", 1, "t"}, \
+ { "fcomi", 2, "f"}, \
+ { "fcomip", 1, "t"}, \
+ { "fcomip", 2, "f"}, \
+ { "fcomp", 1, "f" m32fpm64fp},\
+ { "fdiv", 1, "+t,f,@"}, \
+ { "fdiv", 2, "f,t," m32fpm64fp},\
+ { "fdivp", 1, "+f"}, \
+ { "fdivp", 2, "t"}, \
+ { "fdivr", 1, "+t,@"}, \
+ { "fdivr", 2, "f," m32fpm64fp},\
+ { "fdivrp", 1, "+f"}, \
+ { "fdivrp", 2, "t"}, \
+ { "ffree", 1, "f"}, \
+ { "fiadd", 1, m16m32}, \
+ { "ficom", 1, m16m32}, \
+ { "ficomp", 1, m16m32}, \
+ { "fidiv", 1, m16m32}, \
+ { "fidivr", 1, m16m32}, \
+ { "fild", 1, m16m32m64}, \
+ { "fimul", 1, m16m32}, \
+ { "fist", 1, "=" m16m32}, \
+ { "fistp", 1, "=" m16m32m64}, \
+ { "fisttp", 1, "=" m16m32m64},\
+ { "fisub", 1, m16m32}, \
+ { "fisubr", 1, m16m32}, \
+ { "fld", 1, "f" m32fpm64fpm80fp},\
+ { "fldcw", 1, m16}, \
+ { "fldenv", 1, "m"}, \
+ { "fldt", 1, "m"}, \
+ { "fmul", 1, "=f,t,@"}, \
+ { "fmul", 2, "t,f," m32fpm64fp},\
+ { "fmulp", 1, "=f"}, \
+ { "fmulp", 2, "t"}, \
+ { "fnsave", 1, "=m"}, \
+ { "fnstcw", 1, "m"}, \
+ { "fnstenv", 1, "m"}, \
+ { "fnstsw", 1, "ma"}, \
+ { "frstor", 1, "m"}, \
+ { "fsave", 1, "=m"}, \
+ { "fst", 1, "=f" m32fpm64fp}, \
+ { "fstcw", 1, "=m"}, \
+ { "fstenv", 1, "=m"}, \
+ { "fstp", 1, "=f" m32fpm64fpm80fp},\
+ { "fstsw", 1, "=ma"}, \
+ { "fsub", 1, "=f,t,@"}, \
+ { "fsub", 2, "t,f," m32fpm64fp},\
+ { "fsubr", 1, "=f,t," m32fpm64fp},\
+ { "fsubr", 2, "t,f,@"}, \
+ { "fucom", 1, "f"}, \
+ { "fucomi", 1, "t"}, \
+ { "fucomi", 2, "f"}, \
+ { "fucomip", 1, "t"}, \
+ { "fucomip", 2, "f"}, \
+ { "fucomp", 1, "f"}, \
+ { "fxch", 1, "+f" }, \
+ { "fxrstor", 1, "m"}, \
+ { "fxsave", 1, "=m"}, \
+ { "haddpd", 1, "+x"}, \
+ { "haddpd", 2, "xm"}, \
+ { "haddps", 1, "+x"}, \
+ { "haddps", 2, "xm"}, \
+ { "hsubpd", 1, "+x"}, \
+ { "hsubpd", 2, "xm"}, \
+ { "hsubps", 1, "+x"}, \
+ { "hsubps", 2, "xm"}, \
+ { "idiv", 1, rm8rm16rm32RM64},\
+ { "imul", 1, "+r"}, \
+ { "imul", 2, "rm"}, \
+ { "imul", 3, "i"}, \
+ { "in", 1, "=a"}, \
+ { "in", 2, "i"}, \
+ { "inc", 1, "+" rm8rm16rm32RM64},\
+ { NX "ins", 1, "=" m8m16m32}, \
+ { NX "ins", 2, "d"}, \
+ { "int", 1, "i"}, \
+ { "invlpg", 1, "m"}, \
+ { "ja", 1, "s"}, \
+ { "jae", 1, "s"}, \
+ { "jb", 1, "s"}, \
+ { "jbe", 1, "s"}, \
+ { "jc", 1, "s"}, \
+ { NX "jcxz", 1, rel8}, \
+ { "je", 1, "s"}, \
+ { "jecxz", 1, rel8}, \
+ { "jg", 1, "s"}, \
+ { "jge", 1, "s"}, \
+ { "jl", 1, "s"}, \
+ { "jle", 1, "s"}, \
+ { NX "jmp", 1, "s" rm32}, \
+ { "jna", 1, "s"}, \
+ { "jnae", 1, "s"}, \
+ { "jnb", 1, "s"}, \
+ { "jnc", 1, "s"}, \
+ { "jne", 1, "s"}, \
+ { "jng", 1, "s"}, \
+ { "jnge", 1, "s"}, \
+ { "jnl", 1, "s"}, \
+ { "jnle", 1, "s"}, \
+ { "jno", 1, "s"}, \
+ { "jnp", 1, "s"}, \
+ { "jns", 1, "s"}, \
+ { "jnz", 1, "s"}, \
+ { "jo", 1, "s"}, \
+ { "jp", 1, "s"}, \
+ { "jpe", 1, "s"}, \
+ { "jpo", 1, "s"}, \
+ { "js", 1, "s"}, \
+ { "jz", 1, "s"}, \
+ { "lar", 1, "=r"}, \
+ { "lar", 2, "rm"}, \
+ { "lddqu", 1, "=x"}, \
+ { "lddqu", 2, "m"}, \
+ { "ldmxcsr", 1, "m"}, \
+ { NX "lds", 1, "=" r16 "," r32 C R64},\
+ { NX "lds", 2, m16 "," m32 C M64},\
+ { "lea", 1, "=r"}, \
+ { "lea", 2, "m"}, \
+ { NX "les", 1, "=" r16 "," r32 C R64},\
+ { NX "les", 2, m16 "," m32 C M64},\
+ { "lfs", 1, "=" r16 "," r32 C R64},\
+ { "lfs", 2, m16 "," m32 C M64},\
+ { "lgdt", 1, "m"}, \
+ { "lgs", 1, "=" r16 "," r32 C R64},\
+ { "lgs", 2, m16 "," m32 C M64},\
+ { "lidt", 1, "m"}, \
+ { "lldt", 1, rm16}, \
+ { "lmsw", 1, "m"}, \
+ { NX "lods", 1, m8m16m32M64}, \
+ { "loop", 1, rel8}, \
+ { "loope", 1, rel8}, \
+ { "loopne", 1, rel8}, \
+ { "loopnz", 1, rel8}, \
+ { "loopz", 1, rel8}, \
+ { "lsl", 1, "=" r16 "," r32}, \
+ { "lsl", 2, rm16 "," rm32}, \
+ { "lss", 1, "=" r16 "," r32 C R64},\
+ { "lss", 2, m16 "," m32 C M64},\
+ { "ltr", 1, rm16}, \
+ { "maskmovdqu", 1, "x"}, \
+ { "maskmovdqu", 2, "x"}, \
+ { "maskmovq", 1, "y"}, \
+ { "maskmovq", 2, "y"}, \
+ { "maxpd", 1, "+x"}, \
+ { "maxpd", 2, "xm"}, \
+ { "maxps", 1, "+x"}, \
+ { "maxps", 2, "xm"}, \
+ { "maxsd", 1, "+x"}, \
+ { "maxsd", 2, "xm"}, \
+ { "maxss", 1, "+x"}, \
+ { "maxss", 2, "xm"}, \
+ { "minpd", 1, "+x"}, \
+ { "minpd", 2, "xm"}, \
+ { "minps", 1, "+x"}, \
+ { "minps", 2, "xm"}, \
+ { "minsd", 1, "+x"}, \
+ { "minsd", 2, "xm"}, \
+ { "minss", 1, "+x"}, \
+ { "minss", 2, "xm"}, \
+ { "mov", 1, "=" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64}, \
+ { "mov", 2, ri8 "," ri16 "," ri32 C RI64 "," rmi8 "," rmi16 "," rmi32 C RMI64}, \
+ { "movapd", 1, "=x,xm"}, \
+ { "movapd", 2, "xm,x"}, \
+ { "movaps", 1, "=x,xm"}, \
+ { "movaps", 2, "xm,x"}, \
+ { "movd", 1, "=rm,x,y,rm"}, \
+ { "movd", 2, "x,rm,rm,y"}, \
+ { "movddup", 1, "=x"}, \
+ { "movddup", 2, "xm"}, \
+ { "movdq2q", 1, "=y"}, \
+ { "movdq2q", 2, "x"}, \
+ { "movdqa", 1, "=x"}, \
+ { "movdqa", 2, "xm"}, \
+ { "movdqu", 1, "=x"}, \
+ { "movdqu", 2, "xm"}, \
+ { "movhlps", 1, "=x"}, \
+ { "movhlps", 2, "x"}, \
+ { "movhpd", 1, "=x,m"}, \
+ { "movhpd", 2, "m,x"}, \
+ { "movhps", 1, "=x,m"}, \
+ { "movhps", 2, "m,x"}, \
+ { "movlhps", 1, "=x"}, \
+ { "movlhps", 2, "x"}, \
+ { "movlpd", 1, "=x,m"}, \
+ { "movlpd", 2, "m,x"}, \
+ { "movlps", 1, "=x,m"}, \
+ { "movlps", 2, "m,x"}, \
+ { "movmskpd", 1, "=r"}, \
+ { "movmskpd", 2, "x"}, \
+ { "movmskps", 1, "=r"}, \
+ { "movmskps", 2, "x"}, \
+ { "movntdq", 1, "=m"}, \
+ { "movntdq", 2, "x"}, \
+ { "movnti", 1, "=m"}, \
+ { "movnti", 2, "r"}, \
+ { "movntpd", 1, "=m"}, \
+ { "movntpd", 2, "x"}, \
+ { "movntps", 1, "=m"}, \
+ { "movntps", 2, "x"}, \
+ { "movntq", 1, "=m"}, \
+ { "movntq", 2, "y"}, \
+ { "movq", 1, "=x,m,y,m"}, \
+ { "movq", 2, "xm,x,ym,y"}, \
+ { "movq2dq", 1, "=x"}, \
+ { "movq2dq", 2, "y"}, \
+ { "movs", 1, "=" m8 "," m16 "," m32 C M64},\
+ { "movs", 2, m8 "," m16 "," m32 C M64},\
+ { "movsd", 1, "=xm,x"}, \
+ { "movsd", 2, "x,xm"}, \
+ { "movshdup", 1, "=x"}, \
+ { "movshdup", 2, "xm"}, \
+ { "movsldup", 1, "=x"}, \
+ { "movsldup", 2, "xm"}, \
+ { "movss", 1, "=xm,x"}, \
+ { "movss", 2, "x,xm"}, \
+ { "movsx", 1, "=" r16 "," r32},\
+ { "movsx", 2, rm8 "," rm8rm16},\
+ { "movupd", 1, "=x,xm"}, \
+ { "movupd", 2, "xm,x"}, \
+ { "movups", 1, "=x,xm"}, \
+ { "movups", 2, "xm,x"}, \
+ { "movzx", 1, "=" r16 "," r32},\
+ { "movzx", 2, rm8 "," rm8rm16},\
+ { "mul", 1, rm8rm16rm32}, \
+ { "mulpd", 1, "=x"}, \
+ { "mulpd", 2, "xm"}, \
+ { "mulps", 1, "=x"}, \
+ { "mulps", 2, "xm"}, \
+ { "mulsd", 1, "=x"}, \
+ { "mulsd", 2, "xm"}, \
+ { "mulss", 1, "=x"}, \
+ { "mulss", 2, "xm"}, \
+ { "neg", 1, "+" rm8rm16rm32}, \
+ { "not", 1, "+" rm8rm16rm32}, \
+ { "or", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "or", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "orpd", 1, "+x"}, \
+ { "orpd", 2, "xm"}, \
+ { "orps", 1, "+x"}, \
+ { "orps", 2, "xm"}, \
+ { "out", 1, "id"}, \
+ { "out", 2, a8 a16 a32}, \
+ { NX "outs", 1, "d"}, \
+ { NX "outs", 2, m8m16m32}, \
+ { "packssdw", 1, "+x,y"}, \
+ { "packssdw", 2, "xm,ym"}, \
+ { "packsswb", 1, "+x,y"}, \
+ { "packsswb", 2, "xm,ym"}, \
+ { "packuswb", 1, "+x,y"}, \
+ { "packuswb", 2, "xm,ym"}, \
+ { "paddb", 1, "+x,y"}, \
+ { "paddb", 2, "xm,ym"}, \
+ { "paddd", 1, "+x,y"}, \
+ { "paddd", 2, "xm,ym"}, \
+ { "paddq", 1, "+x,y"}, \
+ { "paddq", 2, "xm,ym"}, \
+ { "paddsb", 1, "+x,y"}, \
+ { "paddsb", 2, "xm,ym"}, \
+ { "paddsw", 1, "+x,y"}, \
+ { "paddsw", 2, "xm,ym"}, \
+ { "paddusb", 1, "+x,y"}, \
+ { "paddusb", 2, "xm,ym"}, \
+ { "paddusw", 1, "+x,y"}, \
+ { "paddusw", 2, "xm,ym"}, \
+ { "paddw", 1, "+x,y"}, \
+ { "paddw", 2, "xm,ym"}, \
+ { "pand", 1, "+x,y"}, \
+ { "pand", 2, "xm,ym"}, \
+ { "pandn", 1, "+x,y"}, \
+ { "pandn", 2, "xm,ym"}, \
+ { "pavgb", 1, "+x,y"}, \
+ { "pavgb", 2, "xm,ym"}, \
+ { "pavgw", 1, "+x,y"}, \
+ { "pavgw", 2, "xm,ym"}, \
+ { "pcmpeqb", 1, "+x,y"}, \
+ { "pcmpeqb", 2, "xm,ym"}, \
+ { "pcmpeqd", 1, "+x,y"}, \
+ { "pcmpeqd", 2, "xm,ym"}, \
+ { "pcmpeqw", 1, "+x,y"}, \
+ { "pcmpeqw", 2, "xm,ym"}, \
+ { "pcmpgtb", 1, "+x,y"}, \
+ { "pcmpgtb", 2, "xm,ym"}, \
+ { "pcmpgtd", 1, "+x,y"}, \
+ { "pcmpgtd", 2, "xm,ym"}, \
+ { "pcmpgtw", 1, "+x,y"}, \
+ { "pcmpgtw", 2, "xm,ym"}, \
+ { "pextrw", 1, "=" r32R64}, \
+ { "pextrw", 2, "xy"}, \
+ { "pextrw", 3, "i"}, \
+ { "pinsrw", 1, "=xy"}, \
+ { "pinsrw", 2, r32R64 "m"}, \
+ { "pinsrw", 3, "i"}, \
+ { "pmaddwd", 1, "+x,y"}, \
+ { "pmaddwd", 2, "xm,ym"}, \
+ { "pmaxsw", 1, "+x,y"}, \
+ { "pmaxsw", 2, "xm,ym"}, \
+ { "pmaxub", 1, "+x,y"}, \
+ { "pmaxub", 2, "xm,ym"}, \
+ { "pminsw", 1, "+x,y"}, \
+ { "pminsw", 2, "xm,ym"}, \
+ { "pminub", 1, "+x,y"}, \
+ { "pminub", 2, "xm,ym"}, \
+ { "pmovmskb", 1, "+" r32R64}, \
+ { "pmovmskb", 2, "xy"}, \
+ { "pmulhuw", 1, "+x,y"}, \
+ { "pmulhuw", 2, "xm,ym"}, \
+ { "pmulhw", 1, "+x,y"}, \
+ { "pmulhw", 2, "xm,ym"}, \
+ { "pmullw", 1, "+x,y"}, \
+ { "pmullw", 2, "xm,ym"}, \
+ { "pmuludq", 1, "+x,y"}, \
+ { "pmuludq", 2, "xm,ym"}, \
+ { "pop", 1, rm16 T(rm32) RM64},\
+ { "por", 1, "+x,y"}, \
+ { "por", 2, "xm,ym"}, \
+ { "prefetchnta", 1, "m"}, \
+ { "prefetcht0", 1, "m"}, \
+ { "prefetcht1", 1, "m"}, \
+ { "prefetcht2", 1, "m"}, \
+ { "psadbw", 1, "+x,y"}, \
+ { "psadbw", 2, "xm,ym"}, \
+ { "pshufd", 1, "=x"}, \
+ { "pshufd", 2, "xm"}, \
+ { "pshufd", 3, "i"}, \
+ { "pshufhw", 1, "=x"}, \
+ { "pshufhw", 2, "xm"}, \
+ { "pshufhw", 3, "i"}, \
+ { "pshuflw", 1, "=x"}, \
+ { "pshuflw", 2, "xm"}, \
+ { "pshuflw", 3, "i"}, \
+ { "pshufw", 1, "=y"}, \
+ { "pshufw", 2, "ym"}, \
+ { "pshufw", 3, "i"}, \
+ { "pslld", 1, "+x,y"}, \
+ { "pslld", 2, "xmi,ymi"}, \
+ { "pslldq", 1, "+x"}, \
+ { "pslldq", 2, "i"}, \
+ { "psllq", 1, "+x,y"}, \
+ { "psllq", 2, "xmi,ymi"}, \
+ { "psllw", 1, "+x,y"}, \
+ { "psllw", 2, "xmi,ymi"}, \
+ { "psrad", 1, "+x,y"}, \
+ { "psrad", 2, "xmi,ymi"}, \
+ { "psraw", 1, "+x,y"}, \
+ { "psraw", 2, "xmi,ymi"}, \
+ { "psrld", 1, "+x,y"}, \
+ { "psrld", 2, "xmi,ymi"}, \
+ { "psrldq", 1, "+x"}, \
+ { "psrldq", 2, "i"}, \
+ { "psrlq", 1, "+x,y"}, \
+ { "psrlq", 2, "xmi,ymi"}, \
+ { "psrlw", 1, "+x,y"}, \
+ { "psrlw", 2, "xmi,ymi"}, \
+ { "psubb", 1, "+x,y"}, \
+ { "psubb", 2, "xm,ym"}, \
+ { "psubd", 1, "+x,y"}, \
+ { "psubd", 2, "xm,ym"}, \
+ { "psubq", 1, "+x,y"}, \
+ { "psubq", 2, "xm,ym"}, \
+ { "psubsb", 1, "+x,y"}, \
+ { "psubsb", 2, "xm,ym"}, \
+ { "psubsw", 1, "+x,y"}, \
+ { "psubsw", 2, "xm,ym"}, \
+ { "psubusb", 1, "+x,y"}, \
+ { "psubusb", 2, "xm,ym"}, \
+ { "psubusw", 1, "+x,y"}, \
+ { "psubusw", 2, "xm,ym"}, \
+ { "psubw", 1, "+x,y"}, \
+ { "psubw", 2, "xm,ym"}, \
+ { "punpckhbw", 1, "+x,y"}, \
+ { "punpckhbw", 2, "xm,ym"}, \
+ { "punpckhdq", 1, "+x,y"}, \
+ { "punpckhdq", 2, "xm,ym"}, \
+ { "punpckhqdq", 1, "+x"}, \
+ { "punpckhqdq", 2, "xm"}, \
+ { "punpckhwd", 1, "+x,y"}, \
+ { "punpckhwd", 2, "xm,ym"}, \
+ { "punpcklbw", 1, "+x,y"}, \
+ { "punpcklbw", 2, "xm,ym"}, \
+ { "punpckldq", 1, "+x,y"}, \
+ { "punpckldq", 2, "xm,ym"}, \
+ { "punpcklqdq", 1, "+x"}, \
+ { "punpcklqdq", 2, "xm"}, \
+ { "punpcklwd", 1, "+x,y"}, \
+ { "punpcklwd", 2, "xm,ym"}, \
+ { "push", 1, rm16 T(rm32) RM64 "i"},\
+ { "pxor", 1, "+x,y"}, \
+ { "pxor", 2, "xm,ym"}, \
+ { "rcl", 1, "+" rm8rm16rm32}, \
+ { "rcl", 2, "ic"}, \
+ { "rcpps", 1, "+x"}, \
+ { "rcpps", 2, "xm"}, \
+ { "rcpss", 1, "+x"}, \
+ { "rcpss", 2, "xm"}, \
+ { "rcr", 1, "+" rm8rm16rm32}, \
+ { "rcr", 2, "ic"}, \
+ { "ret", 1, "i"}, \
+ { "rol", 1, "+" rm8rm16rm32}, \
+ { "rol", 2, "ic"}, \
+ { "ror", 1, "+" rm8rm16rm32}, \
+ { "ror", 2, "ic"}, \
+ { "rsqrtps", 1, "=x"}, \
+ { "rsqrtps", 2, "xm"}, \
+ { "rsqrtss", 1, "=x"}, \
+ { "rsqrtss", 2, "xm"}, \
+ { "sal", 1, "+" rm8rm16rm32}, \
+ { "sal", 2, "ic"}, \
+ { "sar", 1, "+" rm8rm16rm32}, \
+ { "sar", 2, "ic"}, \
+ { "sbb", 1, "+" rm8 "," rm16 "," rm32 "," r8 "," r16 "," r32},\
+ { "sbb", 2, ri8 "," ri16 "," ri32 "," m8 "," m16 "," m32},\
+ { "scas", 1, m8m16m32M64}, \
+ { "seta", 1, "=qm"}, \
+ { "setae", 1, "=qm"}, \
+ { "setb", 1, "=qm"}, \
+ { "setbe", 1, "=qm"}, \
+ { "setc", 1, "=qm"}, \
+ { "sete", 1, "=qm"}, \
+ { "setg", 1, "=qm"}, \
+ { "setge", 1, "=qm"}, \
+ { "setl", 1, "=qm"}, \
+ { "setle", 1, "=qm"}, \
+ { "setna", 1, "=qm"}, \
+ { "setnae", 1, "=qm"}, \
+ { "setnb", 1, "=qm"}, \
+ { "setnbe", 1, "=qm"}, \
+ { "setnc", 1, "=qm"}, \
+ { "setne", 1, "=qm"}, \
+ { "setng", 1, "=qm"}, \
+ { "setnge", 1, "=qm"}, \
+ { "setnl", 1, "=qm"}, \
+ { "setnle", 1, "=qm"}, \
+ { "setno", 1, "=qm"}, \
+ { "setnp", 1, "=qm"}, \
+ { "setns", 1, "=qm"}, \
+ { "setnz", 1, "=qm"}, \
+ { "seto", 1, "=qm"}, \
+ { "setp", 1, "=qm"}, \
+ { "setpe", 1, "=qm"}, \
+ { "setpo", 1, "=qm"}, \
+ { "sets", 1, "=qm"}, \
+ { "setz", 1, "=qm"}, \
+ { NY "sgdt", 1, "=m"}, \
+ { "shl", 1, "+" rm8rm16rm32}, \
+ { "shl", 2, "ic"}, \
+ { "shld", 1, "+" rm16 "," rm32 C RM64},\
+ { "shld", 2, r16 "," r32 C R64},\
+ { "shld", 3, "ic,ic" X(",ic")},\
+ { "shr", 1, "+" rm8rm16rm32}, \
+ { "shr", 2, "ic"}, \
+ { "shrd", 1, "+" rm16 "," rm32 C RM64},\
+ { "shrd", 2, r16 "," r32 C R64},\
+ { "shrd", 3, "ic,ic" X(",ic")}, \
+ { "shufpd", 1, "+x"}, \
+ { "shufpd", 2, "xm"}, \
+ { "shufpd", 3, "i"}, \
+ { "shufps", 1, "+x"}, \
+ { "shufps", 2, "xm"}, \
+ { "shufps", 3, "i"}, \
+ { NY "sidt", 1, "=m"}, \
+ { "sldt", 1, "=q" S("2") "m"},\
+ { "smsw", 1, "=q" S("2") "m"},\
+ { "sqrtpd", 1, "=x"}, \
+ { "sqrtpd", 2, "xm"}, \
+ { "sqrtps", 1, "=x"}, \
+ { "sqrtps", 2, "xm"}, \
+ { "sqrtsd", 1, "=x"}, \
+ { "sqrtsd", 2, "xm"}, \
+ { "sqrtss", 1, "=x"}, \
+ { "sqrtss", 2, "xm"}, \
+ { "stmxcsr", 1, "m"}, \
+ { "stos", 1, "=m"}, \
+ { "str", 1, "=q" S("2") "m"},\
+ { "sub", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "sub", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "subpd", 1, "+x"}, \
+ { "subpd", 2, "xm"}, \
+ { "subps", 1, "+x"}, \
+ { "subps", 2, "xm"}, \
+ { "subsd", 1, "+x"}, \
+ { "subsd", 2, "xm"}, \
+ { "subss", 1, "+x"}, \
+ { "subss", 2, "xm"}, \
+ { "test", 1, "+r," rm8rm16rm32},\
+ { "test", 2, "r,i"}, \
+ { "ucomisd", 1, "+x"}, \
+ { "ucomisd", 2, "xm"}, \
+ { "ucomiss", 1, "+x"}, \
+ { "ucomiss", 2, "xm"}, \
+ { "unpckhpd", 1, "+x"}, \
+ { "unpckhpd", 2, "xm"}, \
+ { "unpckhps", 1, "+x"}, \
+ { "unpckhps", 2, "xm"}, \
+ { "unpcklpd", 1, "+x"}, \
+ { "unpcklpd", 2, "xm"}, \
+ { "unpcklps", 1, "+x"}, \
+ { "unpcklps", 2, "xm"}, \
+ { "verr", 1, rm16}, \
+ { "verw", 1, rm16}, \
+ { "xadd", 1, "+" rm8 "," rm16 "," rm32},\
+ { "xadd", 2, r8 "," r16 "," r32},\
+ { "xchg", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "xchg", 2, "+" r8 "," r16 "," r32 C R64 "," m8 "," m16 "," m32 C M64},\
+ { "xlat", 1, "m"}, \
+ { "xor", 1, "+" rm8 "," rm16 "," rm32 C RM64 "," r8 "," r16 "," r32 C R64},\
+ { "xor", 2, ri8 "," ri16 "," ri32 C RI64 "," m8 "," m16 "," m32 C M64},\
+ { "xorpd", 1, "+x"}, \
+ { "xorpd", 2, "xm"}, \
+ { "xorps", 1, "+x"}, \
+ { "xorps", 2, "xm"},
+
+#define TARGET_IASM_EXTRA_CLOBBERS \
+ { "rdtsc", { "edx", "eax"} }
+
+#define IASM_FUNCTION_MODIFIER "P"
+
+#define IASM_REGISTER_NAME(STR, BUF) i386_iasm_register_name (STR, BUF)
+
+/* APPLE LOCAL end CW asm blocks */
+
+/* Flag to mark data that is in the large address area. */
+#define SYMBOL_FLAG_FAR_ADDR (SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_FAR_ADDR_P(X) \
+ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0)
+/*
+Local variables:
+version-control: t
+End:
+*/
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.md b/gcc-4.2.1-5666.3/gcc/config/i386/i386.md
new file mode 100644
index 000000000..e825a0474
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.md
@@ -0,0 +1,21399 @@
+;; GCC machine description for IA-32 and x86-64.
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2005, 2006
+;; Free Software Foundation, Inc.
+;; Mostly by William Schelter.
+;; x86_64 support added by Jan Hubicka
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA. */
+;;
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+;;
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;
+;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register
+;; constraint letters.
+;;
+;; The special asm out single letter directives following a '%' are:
+;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of
+;; operands[1].
+;; 'L' Print the opcode suffix for a 32-bit integer opcode.
+;; 'W' Print the opcode suffix for a 16-bit integer opcode.
+;; 'B' Print the opcode suffix for an 8-bit integer opcode.
+;; 'Q' Print the opcode suffix for a 64-bit float opcode.
+;; 'S' Print the opcode suffix for a 32-bit float opcode.
+;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode.
+;; 'J' Print the appropriate jump operand.
+;;
+;; 'b' Print the QImode name of the register for the indicated operand.
+;; %b0 would print %al if operands[0] is reg 0.
+;; 'w' Likewise, print the HImode name of the register.
+;; 'k' Likewise, print the SImode name of the register.
+;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh.
+;; 'y' Print "st(0)" instead of "st" as a register.
+
+;; UNSPEC usage:
+
+(define_constants
+ [; Relocation specifiers
+ (UNSPEC_GOT 0)
+ (UNSPEC_GOTOFF 1)
+ (UNSPEC_GOTPCREL 2)
+ (UNSPEC_GOTTPOFF 3)
+ (UNSPEC_TPOFF 4)
+ (UNSPEC_NTPOFF 5)
+ (UNSPEC_DTPOFF 6)
+ (UNSPEC_GOTNTPOFF 7)
+ (UNSPEC_INDNTPOFF 8)
+
+ ; Prologue support
+ (UNSPEC_STACK_ALLOC 11)
+ (UNSPEC_SET_GOT 12)
+ (UNSPEC_SSE_PROLOGUE_SAVE 13)
+ (UNSPEC_REG_SAVE 14)
+ (UNSPEC_DEF_CFA 15)
+
+ ; TLS support
+ (UNSPEC_TP 16)
+ (UNSPEC_TLS_GD 17)
+ (UNSPEC_TLS_LD_BASE 18)
+ (UNSPEC_TLSDESC 19)
+
+ ; Other random patterns
+ (UNSPEC_SCAS 20)
+ (UNSPEC_FNSTSW 21)
+ (UNSPEC_SAHF 22)
+ (UNSPEC_FSTCW 23)
+ (UNSPEC_ADD_CARRY 24)
+ (UNSPEC_FLDCW 25)
+ (UNSPEC_REP 26)
+ (UNSPEC_EH_RETURN 27)
+ (UNSPEC_LD_MPIC 28) ; load_macho_picbase
+
+ ; For SSE/MMX support:
+ (UNSPEC_FIX_NOTRUNC 30)
+ (UNSPEC_MASKMOV 31)
+ (UNSPEC_MOVMSK 32)
+ (UNSPEC_MOVNT 33)
+ (UNSPEC_MOVU 34)
+ (UNSPEC_RCP 35)
+ (UNSPEC_RSQRT 36)
+ (UNSPEC_SFENCE 37)
+ (UNSPEC_NOP 38) ; prevents combiner cleverness
+ (UNSPEC_PFRCP 39)
+ (UNSPEC_PFRCPIT1 40)
+ (UNSPEC_PFRCPIT2 41)
+ (UNSPEC_PFRSQRT 42)
+ (UNSPEC_PFRSQIT1 43)
+ (UNSPEC_MFENCE 44)
+ (UNSPEC_LFENCE 45)
+ (UNSPEC_PSADBW 46)
+ (UNSPEC_LDQQU 47)
+ ; APPLE LOCAL begin 4121692
+ (UNSPEC_LDQ 201)
+ (UNSPEC_MOVQ 202)
+ (UNSPEC_STOQ 203)
+ ; APPLE LOCAL end 4121692
+
+ ; Generic math support
+ (UNSPEC_COPYSIGN 50)
+ (UNSPEC_IEEE_MIN 51) ; not commutative
+ (UNSPEC_IEEE_MAX 52) ; not commutative
+
+ ; x87 Floating point
+ (UNSPEC_SIN 60)
+ (UNSPEC_COS 61)
+ (UNSPEC_FPATAN 62)
+ (UNSPEC_FYL2X 63)
+ (UNSPEC_FYL2XP1 64)
+ (UNSPEC_FRNDINT 65)
+ (UNSPEC_FIST 66)
+ (UNSPEC_F2XM1 67)
+
+ ; x87 Rounding
+ (UNSPEC_FRNDINT_FLOOR 70)
+ (UNSPEC_FRNDINT_CEIL 71)
+ (UNSPEC_FRNDINT_TRUNC 72)
+ (UNSPEC_FRNDINT_MASK_PM 73)
+ (UNSPEC_FIST_FLOOR 74)
+ (UNSPEC_FIST_CEIL 75)
+ ; APPLE LOCAL 3399553
+ (UNSPEC_FLT_ROUNDS 76)
+
+ ; x87 Double output FP
+ (UNSPEC_SINCOS_COS 80)
+ (UNSPEC_SINCOS_SIN 81)
+ (UNSPEC_TAN_ONE 82)
+ (UNSPEC_TAN_TAN 83)
+ (UNSPEC_XTRACT_FRACT 84)
+ (UNSPEC_XTRACT_EXP 85)
+ (UNSPEC_FSCALE_FRACT 86)
+ (UNSPEC_FSCALE_EXP 87)
+ (UNSPEC_FPREM_F 88)
+ (UNSPEC_FPREM_U 89)
+ (UNSPEC_FPREM1_F 90)
+ (UNSPEC_FPREM1_U 91)
+
+ ; SSP patterns
+ (UNSPEC_SP_SET 100)
+ (UNSPEC_SP_TEST 101)
+ (UNSPEC_SP_TLS_SET 102)
+ (UNSPEC_SP_TLS_TEST 103)
+ ; APPLE LOCAL begin mainline
+ ; SSSE3
+ (UNSPEC_PSHUFB 220)
+ (UNSPEC_PSIGN 221)
+ (UNSPEC_PALIGNR 222)
+ ; APPLE LOCAL end mainline
+ ; APPLE LOCAL begin 5612787 mainline sse4
+ ; For SSE4A support
+ (UNSPEC_EXTRQI 130)
+ (UNSPEC_EXTRQ 131)
+ (UNSPEC_INSERTQI 132)
+ (UNSPEC_INSERTQ 133)
+
+ ; For SSE4.1 support
+ (UNSPEC_BLENDV 134)
+ (UNSPEC_INSERTPS 135)
+ (UNSPEC_DP 136)
+ (UNSPEC_MOVNTDQA 137)
+ (UNSPEC_MPSADBW 138)
+ (UNSPEC_PHMINPOSUW 139)
+ (UNSPEC_PTEST 140)
+ (UNSPEC_ROUND 141)
+
+ ; For SSE4.2 support
+ (UNSPEC_CRC32 143)
+ (UNSPEC_PCMPESTR 144)
+ (UNSPEC_PCMPISTR 145)
+ ; APPLE LOCAL end 5612787 mainline sse4
+ ])
+
+(define_constants
+ [(UNSPECV_BLOCKAGE 0)
+ (UNSPECV_STACK_PROBE 1)
+ (UNSPECV_EMMS 2)
+ (UNSPECV_LDMXCSR 3)
+ (UNSPECV_STMXCSR 4)
+ (UNSPECV_FEMMS 5)
+ (UNSPECV_CLFLUSH 6)
+ (UNSPECV_ALIGN 7)
+ (UNSPECV_MONITOR 8)
+ (UNSPECV_MWAIT 9)
+ (UNSPECV_CMPXCHG_1 10)
+ (UNSPECV_CMPXCHG_2 11)
+ (UNSPECV_XCHG 12)
+ (UNSPECV_LOCK 13)
+ ])
+
+;; Registers by name.
+(define_constants
+ [(BP_REG 6)
+ (SP_REG 7)
+ (FLAGS_REG 17)
+ (FPSR_REG 18)
+ (DIRFLAG_REG 19)
+ ])
+
+;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
+;; from i386.c.
+
+;; In C guard expressions, put expressions which may be compile-time
+;; constants first. This allows for better optimization. For
+;; example, write "TARGET_64BIT && reload_completed", not
+;; "reload_completed && TARGET_64BIT".
+
+
+;; Processor type. This attribute must exactly match the processor_type
+;; enumeration in i386.h.
+; APPLE LOCAL mainline 2006-04-19 4434601
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
+ (const (symbol_ref "ix86_tune")))
+
+;; A basic instruction type. Refinements due to arguments to be
+;; provided in other attributes.
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_attr "type"
+ "other,multi,
+ alu,alu1,negnot,imov,imovx,lea,
+ incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+ icmp,test,ibr,setcc,icmov,
+ push,pop,call,callv,leave,
+ str,cld,
+ fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
+ sselog,sselog1,sseiadd,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
+ mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
+ (const_string "other"))
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Main data type used by the insn
+(define_attr "mode"
+ "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF"
+ (const_string "unknown"))
+
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+ (const_string "i387")
+ (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
+ (const_string "sse")
+ (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+ (const_string "mmx")
+ (eq_attr "type" "other")
+ (const_string "unknown")]
+ (const_string "integer")))
+
+;; The (bounding maximum) length of an instruction immediate.
+(define_attr "length_immediate" ""
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
+ (const_int 0)
+ (eq_attr "unit" "i387,sse,mmx")
+ (const_int 0)
+ (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
+ imul,icmp,push,pop")
+ (symbol_ref "ix86_attr_length_immediate_default(insn,1)")
+ (eq_attr "type" "imov,test")
+ (symbol_ref "ix86_attr_length_immediate_default(insn,0)")
+ (eq_attr "type" "call")
+ (if_then_else (match_operand 0 "constant_call_address_operand" "")
+ (const_int 4)
+ (const_int 0))
+ (eq_attr "type" "callv")
+ (if_then_else (match_operand 1 "constant_call_address_operand" "")
+ (const_int 4)
+ (const_int 0))
+ ;; We don't know the size before shorten_branches. Expect
+ ;; the instruction to fit for better scheduling.
+ (eq_attr "type" "ibr")
+ (const_int 1)
+ ]
+ (symbol_ref "/* Update immediate_length and other attributes! */
+ gcc_unreachable (),1")))
+
+;; The (bounding maximum) length of an instruction address.
+(define_attr "length_address" ""
+ (cond [(eq_attr "type" "str,cld,other,multi,fxch")
+ (const_int 0)
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_int 0)
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_int 0)
+ ]
+ (symbol_ref "ix86_attr_length_address_default (insn)")))
+
+;; Set when length prefix is used.
+(define_attr "prefix_data16" ""
+ (if_then_else (ior (eq_attr "mode" "HI")
+ (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF")))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when string REP prefix is used.
+(define_attr "prefix_rep" ""
+ (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_0f" ""
+ (if_then_else
+ (ior (eq_attr "type" "imovx,setcc,icmov")
+ (eq_attr "unit" "sse,mmx"))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when REX opcode prefix is used.
+(define_attr "prefix_rex" ""
+ (cond [(and (eq_attr "mode" "DI")
+ (eq_attr "type" "!push,pop,call,callv,leave,ibr"))
+ (const_int 1)
+ (and (eq_attr "mode" "QI")
+ (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)")
+ (const_int 0)))
+ (const_int 1)
+ (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+ (const_int 0))
+ (const_int 1)
+ ]
+ (const_int 0)))
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; There are also additional prefixes in SSSE3.
+(define_attr "prefix_extra" "" (const_int 0))
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Set when modrm byte is used.
+(define_attr "modrm" ""
+ (cond [(eq_attr "type" "str,cld,leave")
+ (const_int 0)
+ (eq_attr "unit" "i387")
+ (const_int 0)
+ (and (eq_attr "type" "incdec")
+ (ior (match_operand:SI 1 "register_operand" "")
+ (match_operand:HI 1 "register_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "push")
+ (not (match_operand 1 "memory_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "pop")
+ (not (match_operand 0 "memory_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "imov")
+ (ior (and (match_operand 0 "register_operand" "")
+ (match_operand 1 "immediate_operand" ""))
+ (ior (and (match_operand 0 "ax_reg_operand" "")
+ (match_operand 1 "memory_displacement_only_operand" ""))
+ (and (match_operand 0 "memory_displacement_only_operand" "")
+ (match_operand 1 "ax_reg_operand" "")))))
+ (const_int 0)
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_int 0)
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_int 0)
+ ]
+ (const_int 1)))
+
+;; The (bounding maximum) length of an instruction in bytes.
+;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
+;; Later we may want to split them and compute proper length as for
+;; other insns.
+(define_attr "length" ""
+ (cond [(eq_attr "type" "other,multi,fistp,frndint")
+ (const_int 16)
+ (eq_attr "type" "fcmp")
+ (const_int 4)
+ (eq_attr "unit" "i387")
+ (plus (const_int 2)
+ (plus (attr "prefix_data16")
+ (attr "length_address")))]
+ (plus (plus (attr "modrm")
+ (plus (attr "prefix_0f")
+ (plus (attr "prefix_rex")
+ (const_int 1))))
+ (plus (attr "prefix_rep")
+ (plus (attr "prefix_data16")
+ (plus (attr "length_immediate")
+ (attr "length_address")))))))
+
+;; The `memory' attribute is `none' if no memory is referenced, `load' or
+;; `store' if there is a simple memory reference therein, or `unknown'
+;; if the instruction is complex.
+
+(define_attr "memory" "none,load,store,both,unknown"
+ (cond [(eq_attr "type" "other,multi,str")
+ (const_string "unknown")
+ (eq_attr "type" "lea,fcmov,fpspc,cld")
+ (const_string "none")
+ (eq_attr "type" "fistp,leave")
+ (const_string "both")
+ (eq_attr "type" "frndint")
+ (const_string "load")
+ (eq_attr "type" "push")
+ (if_then_else (match_operand 1 "memory_operand" "")
+ (const_string "both")
+ (const_string "store"))
+ (eq_attr "type" "pop")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "both")
+ (const_string "load"))
+ (eq_attr "type" "setcc")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "store")
+ (const_string "none"))
+ (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
+ (if_then_else (ior (match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "load")
+ (const_string "none"))
+ (eq_attr "type" "ibr")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "load")
+ (const_string "none"))
+ (eq_attr "type" "call")
+ (if_then_else (match_operand 0 "constant_call_address_operand" "")
+ (const_string "none")
+ (const_string "load"))
+ (eq_attr "type" "callv")
+ (if_then_else (match_operand 1 "constant_call_address_operand" "")
+ (const_string "none")
+ (const_string "load"))
+ (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "both")
+ (and (match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "both")
+ (match_operand 0 "memory_operand" "")
+ (const_string "store")
+ (match_operand 1 "memory_operand" "")
+ (const_string "load")
+ (and (eq_attr "type"
+ "!alu1,negnot,ishift1,
+ imov,imovx,icmp,test,
+ fmov,fcmp,fsgn,
+ sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
+ mmx,mmxmov,mmxcmp,mmxcvt")
+ (match_operand 2 "memory_operand" ""))
+ (const_string "load")
+ (and (eq_attr "type" "icmov")
+ (match_operand 3 "memory_operand" ""))
+ (const_string "load")
+ ]
+ (const_string "none")))
+
+;; Indicates if an instruction has both an immediate and a displacement.
+
+(define_attr "imm_disp" "false,true,unknown"
+ (cond [(eq_attr "type" "other,multi")
+ (const_string "unknown")
+ (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
+ (and (match_operand 0 "memory_displacement_operand" "")
+ (match_operand 1 "immediate_operand" "")))
+ (const_string "true")
+ (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+ (and (match_operand 0 "memory_displacement_operand" "")
+ (match_operand 2 "immediate_operand" "")))
+ (const_string "true")
+ ]
+ (const_string "false")))
+
+;; Indicates if an FP operation has an integer source.
+
+(define_attr "fp_int_src" "false,true"
+ (const_string "false"))
+
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+ (const_string "any"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+ [(set_attr "length" "128")
+ (set_attr "type" "multi")])
+
+;; All x87 floating point modes
+(define_mode_macro X87MODEF [SF DF XF])
+
+;; All integer modes handled by x87 fisttp operator.
+(define_mode_macro X87MODEI [HI SI DI])
+
+;; All integer modes handled by integer x87 operators.
+(define_mode_macro X87MODEI12 [HI SI])
+
+;; All SSE floating point modes
+(define_mode_macro SSEMODEF [SF DF])
+
+;; All integer modes handled by SSE cvtts?2si* operators.
+(define_mode_macro SSEMODEI24 [SI DI])
+
+
+;; Scheduling descriptions
+
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare instructions.
+
+;; All compare insns have expanders that save the operands away without
+;; actually generating RTL. The bCOND or sCOND (emitted immediately
+;; after the cmp) will actually emit the cmpM.
+
+(define_expand "cmpti"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[0] = force_reg (TImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpdi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "x86_64_general_operand" "")))]
+ ""
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[0] = force_reg (DImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpsi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SI 0 "cmpsi_operand" "")
+ (match_operand:SI 1 "general_operand" "")))]
+ ""
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[0] = force_reg (SImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmphi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:HI 0 "nonimmediate_operand" "")
+ (match_operand:HI 1 "general_operand" "")))]
+ ""
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[0] = force_reg (HImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpqi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[0] = force_reg (QImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_insn "cmpdi_ccno_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:DI 1 "const0_operand" "n,n")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{q}\t{%0, %0|%0, %0}
+ cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*cmpdi_minus_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:DI 1 "x86_64_general_operand" "re,mr"))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")])
+
+(define_expand "cmpdi_1_rex64"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" "")))]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "cmpdi_1_insn_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r")
+ (match_operand:DI 1 "x86_64_general_operand" "re,mr")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")])
+
+
+(define_insn "*cmpsi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:SI 1 "const0_operand" "n,n")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{l}\t{%0, %0|%0, %0}
+ cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*cmpsi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:SI 1 "general_operand" "ri,mr"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")])
+
+(define_expand "cmpsi_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:SI 1 "general_operand" "ri,mr")))]
+ ""
+ "")
+
+(define_insn "*cmpsi_1_insn"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:SI 1 "general_operand" "ri,mr")))]
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")])
+
+(define_insn "*cmphi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:HI 1 "const0_operand" "n,n")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{w}\t{%0, %0|%0, %0}
+ cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:HI 1 "general_operand" "ri,mr"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:HI 1 "general_operand" "ri,mr")))]
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmpqi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq")
+ (match_operand:QI 1 "const0_operand" "n,n")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{b}\t{%0, %0|%0, %0}
+ cmp{b}\t{$0, %0|%0, 0}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+ (match_operand:QI 1 "general_operand" "qi,mq")))]
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+ (match_operand:QI 1 "general_operand" "qi,mq"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "general_operand" "Qm")
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "register_operand" "Q")
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "const0_operand" "n")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t%h0, %h0"
+ [(set_attr "type" "test")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_expand "cmpqi_ext_3"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "general_operand" "")))]
+ ""
+ "")
+
+(define_insn "cmpqi_ext_3_insn"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "general_operand" "Qmn")))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "cmpqi_ext_3_insn_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "nonmemory_operand" "Qn")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_4"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+;; These implement float point compares.
+;; %%% See if we can get away with VOIDmode operands on the actual insns,
+;; which would allow mix and match FP modes on the compares. Which is what
+;; the old patterns did, but with many more of them.
+
+(define_expand "cmpxf"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:XF 0 "nonmemory_operand" "")
+ (match_operand:XF 1 "nonmemory_operand" "")))]
+ "TARGET_80387"
+{
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpdf"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "")
+ (match_operand:DF 1 "cmp_fp_expander_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpsf"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "")
+ (match_operand:SF 1 "cmp_fp_expander_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+{
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+;; FP compares, step 1:
+;; Set the FP condition codes.
+;;
+;; CCFPmode compare with exceptions
+;; CCFPUmode compare with no exceptions
+
+;; We may not use "#" to split and emit these, since the REG_DEAD notes
+;; used to manage the reg stack popping would not be preserved.
+
+(define_insn "*cmpfp_0"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "const0_operand" "X"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn "*cmpfp_sf"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "nonimmediate_operand" "fm"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "*cmpfp_df"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand:DF 1 "register_operand" "f")
+ (match_operand:DF 2 "nonimmediate_operand" "fm"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "DF")])
+
+(define_insn "*cmpfp_xf"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand:XF 1 "register_operand" "f")
+ (match_operand:XF 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "XF")])
+
+(define_insn "*cmpfp_u"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFPU
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "* return output_fp_compare (insn, operands, 0, 1);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn "*cmpfp_<mode>"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operator 3 "float_operator"
+ [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+;; FP compares, step 2
+;; Move the fpsw to ax.
+
+(define_insn "x86_fnstsw_1"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "fnstsw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "SI")
+ (set_attr "unit" "i387")])
+
+;; FP compares, step 3
+;; Get ax into flags, general case.
+
+(define_insn "x86_sahf_1"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))]
+ "!TARGET_64BIT"
+ "sahf"
+ [(set_attr "length" "1")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "mode" "SI")])
+
+;; Pentium Pro can do steps 1 through 3 in one go.
+
+(define_insn "*cmpfp_i_mixed"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "f,x")
+ (match_operand 1 "nonimmediate_operand" "f,xm")))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "fcmp,ssecomi")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_i_sse"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "x")
+ (match_operand 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "ssecomi")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_i_i387"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "f")
+ (match_operand 1 "register_operand" "f")))]
+ "TARGET_80387 && TARGET_CMOVE
+ && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "fcmp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_iu_mixed"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "f,x")
+ (match_operand 1 "nonimmediate_operand" "f,xm")))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "fcmp,ssecomi")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_iu_sse"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "x")
+ (match_operand 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "ssecomi")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_iu_387"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "f")
+ (match_operand 1 "register_operand" "f")))]
+ "TARGET_80387 && TARGET_CMOVE
+ && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "fcmp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
+ (set_attr "athlon_decode" "vector")])
+
+;; Move instructions.
+
+;; General case of fullword move.
+
+(define_expand "movsi"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (SImode, operands); DONE;")
+
+;; Push/pop instructions. They are separate since autoinc/dec is not a
+;; general_operand.
+;;
+;; %%% We don't use a post-inc memory reference because x86 is not a
+;; general AUTO_INC_DEC host, which impacts how it is treated in flow.
+;; Changing this impacts compiler performance on other non-AUTO_INC_DEC
+;; targets without our curiosities, and it is just as easy to represent
+;; this differently.
+
+(define_insn "*pushsi2"
+ [(set (match_operand:SI 0 "push_operand" "=<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+ "!TARGET_64BIT"
+ "push{l}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushsi2_rex64"
+ [(set (match_operand:SI 0 "push_operand" "=X")
+ (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+(define_insn "*pushsi2_prologue"
+ [(set (match_operand:SI 0 "push_operand" "=<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "push{l}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popsi1_epilogue"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "pop{l}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "SI")])
+
+(define_insn "popsi1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG) (const_int 4)))]
+ "!TARGET_64BIT"
+ "pop{l}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movsi_xor"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "const0_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && (!TARGET_USE_MOV0 || optimize_size)"
+ "xor{l}\t{%0, %0|%0, %0}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_or"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "immediate_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && operands[1] == constm1_rtx
+ && (TARGET_PENTIUM || optimize_size)"
+{
+ operands[1] = constm1_rtx;
+ return "or{l}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "1")])
+
+(define_insn "*movsi_1"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:SI 0 "nonimmediate_operand"
+ "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+ (match_operand:SI 1 "general_operand"
+ "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SSELOG1:
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t%0, %0";
+ return "xorps\t%0, %0";
+
+ case TYPE_SSEMOV:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_TI:
+ return "movdqa\t{%1, %0|%0, %1}";
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_SI:
+ return "movd\t{%1, %0|%0, %1}";
+ case MODE_SF:
+ return "movss\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ case TYPE_MMXADD:
+ return "pxor\t%0, %0";
+
+ case TYPE_MMXMOV:
+ if (get_attr_mode (insn) == MODE_DI)
+ return "movq\t{%1, %0|%0, %1}";
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_LEA:
+ return "lea{l}\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+ return "mov{l}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "mmxadd")
+ (eq_attr "alternative" "3,4,5")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "6")
+ (const_string "sselog1")
+ (eq_attr "alternative" "7,8,9,10,11")
+ (const_string "ssemov")
+ (match_operand:DI 1 "pic_32bit_operand" "")
+ (const_string "lea")
+ ]
+ (const_string "imov")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (const_string "DI")
+ (eq_attr "alternative" "6,7")
+ (if_then_else
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (and (eq_attr "alternative" "8,9,10,11")
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (const_string "SF")
+ ]
+ (const_string "SI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabssi_1_rex64"
+ [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:SI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{l}\t{%1, %P0|%P0, %1}
+ mov{l}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movabssi_2_rex64"
+ [(set (match_operand:SI 0 "register_operand" "=a,r")
+ (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{l}\t{%P1, %0|%0, %P1}
+ mov{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "SI")])
+
+(define_insn "*swapsi"
+ [(set (match_operand:SI 0 "register_operand" "+r")
+ (match_operand:SI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ ""
+ "xchg{l}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movhi"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (match_operand:HI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (HImode, operands); DONE;")
+
+(define_insn "*pushhi2"
+ [(set (match_operand:HI 0 "push_operand" "=X")
+ (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
+ "!TARGET_64BIT"
+ "push{l}\t%k1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushhi2_rex64"
+ [(set (match_operand:HI 0 "push_operand" "=X")
+ (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+ "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ /* movzwl is faster than movw on p2 due to partial word stalls,
+ though not as fast as an aligned movl. */
+ return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+ default:
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else
+ return "mov{w}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(ne (symbol_ref "optimize_size") (const_int 0))
+ (const_string "imov")
+ (and (eq_attr "alternative" "0")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
+ (const_string "imov")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand" ""))
+ (const_string "imov")
+ (and (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))
+ (eq_attr "alternative" "0,2"))
+ (const_string "imovx")
+ ]
+ (const_string "imov")))
+ (set (attr "mode")
+ (cond [(eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand" ""))
+ (const_string "SI")
+ (and (eq_attr "alternative" "0")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
+ (const_string "SI")
+ ]
+ (const_string "HI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabshi_1_rex64"
+ [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:HI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{w}\t{%1, %P0|%P0, %1}
+ mov{w}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*movabshi_2_rex64"
+ [(set (match_operand:HI 0 "register_operand" "=a,r")
+ (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{w}\t{%P1, %0|%0, %P1}
+ mov{w}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "HI")])
+
+(define_insn "*swaphi_1"
+ [(set (match_operand:HI 0 "register_operand" "+r")
+ (match_operand:HI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "!TARGET_PARTIAL_REG_STALL || optimize_size"
+ "xchg{l}\t%k1, %k0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*swaphi_2"
+ [(set (match_operand:HI 0 "register_operand" "+r")
+ (match_operand:HI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_PARTIAL_REG_STALL"
+ "xchg{w}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "HI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstricthi"
+ [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
+ (match_operand:HI 1 "general_operand" ""))]
+ "! TARGET_PARTIAL_REG_STALL || optimize_size"
+{
+ /* Don't generate memory->memory moves, go through a register */
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*movstricthi_1"
+ [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r"))
+ (match_operand:HI 1 "general_operand" "rn,m"))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "mov{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "HI")])
+
+(define_insn "*movstricthi_xor"
+ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+ (match_operand:HI 1 "const0_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)"
+ "xor{w}\t{%0, %0|%0, %0}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")
+ (set_attr "length_immediate" "0")])
+
+(define_expand "movqi"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (QImode, operands); DONE;")
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte". But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+(define_insn "*pushqi2"
+ [(set (match_operand:QI 0 "push_operand" "=X")
+ (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))]
+ "!TARGET_64BIT"
+ "push{l}\t%k1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushqi2_rex64"
+ [(set (match_operand:QI 0 "push_operand" "=X")
+ (match_operand:QI 1 "nonmemory_no_elim_operand" "qi"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there. Then we use movzx.
+(define_insn "*movqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+ (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))]
+ "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ gcc_assert (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM);
+ return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+ default:
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else
+ return "mov{b}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "5")
+ (not (match_operand:QI 1 "aligned_operand" "")))
+ (const_string "imovx")
+ (ne (symbol_ref "optimize_size") (const_int 0))
+ (const_string "imov")
+ (and (eq_attr "alternative" "3")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0))))
+ (const_string "imov")
+ (eq_attr "alternative" "3,5")
+ (const_string "imovx")
+ (and (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))
+ (eq_attr "alternative" "2"))
+ (const_string "imovx")
+ ]
+ (const_string "imov")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,5")
+ (const_string "SI")
+ (eq_attr "alternative" "6")
+ (const_string "QI")
+ (eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "0,1")
+ (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (and (eq (symbol_ref "optimize_size")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))))))
+ (const_string "SI")
+ ;; Avoid partial register stalls when not using QImode arithmetic
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "0,1")
+ (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0)))))
+ (const_string "SI")
+ ]
+ (const_string "QI")))])
+
+(define_expand "reload_outqi"
+ [(parallel [(match_operand:QI 0 "" "=m")
+ (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "=&q")])]
+ ""
+{
+ rtx op0, op1, op2;
+ op0 = operands[0]; op1 = operands[1]; op2 = operands[2];
+
+ gcc_assert (!reg_overlap_mentioned_p (op2, op0));
+ if (! q_regs_operand (op1, QImode))
+ {
+ emit_insn (gen_movqi (op2, op1));
+ op1 = op2;
+ }
+ emit_insn (gen_movqi (op0, op1));
+ DONE;
+})
+
+(define_insn "*swapqi_1"
+ [(set (match_operand:QI 0 "register_operand" "+r")
+ (match_operand:QI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "!TARGET_PARTIAL_REG_STALL || optimize_size"
+ "xchg{l}\t%k1, %k0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_insn "*swapqi_2"
+ [(set (match_operand:QI 0 "register_operand" "+q")
+ (match_operand:QI 1 "register_operand" "+q"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_PARTIAL_REG_STALL"
+ "xchg{b}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrictqi"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (match_operand:QI 1 "general_operand" ""))]
+ "! TARGET_PARTIAL_REG_STALL || optimize_size"
+{
+ /* Don't generate memory->memory moves, go through a register. */
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*movstrictqi_1"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (match_operand:QI 1 "general_operand" "*qn,m"))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "mov{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+; APPLE LOCAL begin radar 4645709 5131847
+; It is based on movstrictqi_xor where partial register update is performed.
+; If optimize_size is not set, it is better to update the whole register.
+(define_insn "*movstrictqi_and"
+ [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
+ (match_operand:QI 1 "const0_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && (!TARGET_USE_MOV0 && !optimize_size)"
+{
+ if (TARGET_64BIT)
+ return "and{q}\t{$0xffffffffffffff00, %q0}";
+ else
+ return "and{l}\t{$0xffffff00, %k0}";
+}
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+; APPLE LOCAL end radar 4645709 5131847
+
+(define_insn "*movstrictqi_xor"
+ [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
+ (match_operand:QI 1 "const0_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && (!TARGET_USE_MOV0 || optimize_size)"
+ "xor{b}\t{%0, %0|%0, %0}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_extv_1"
+ [(set (match_operand:SI 0 "register_operand" "=R")
+ (sign_extract:SI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movs{bl|x}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movhi_extv_1"
+ [(set (match_operand:HI 0 "register_operand" "=R")
+ (sign_extract:HI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
+ (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "*movqi_extv_1_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+ (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsqi_1_rex64"
+ [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:QI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{b}\t{%1, %P0|%P0, %1}
+ mov{b}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movabsqi_2_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=a,r")
+ (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{b}\t{%P1, %0|%0, %P1}
+ mov{b}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movdi_extzv_1"
+ [(set (match_operand:DI 0 "register_operand" "=R")
+ (zero_extract:DI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ "TARGET_64BIT"
+ "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movsi_extzv_1"
+ [(set (match_operand:SI 0 "register_operand" "=R")
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movz{bl|x}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+ (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)) 0))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "*movqi_extzv_2_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+ (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)) 0))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0)))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "movsi_insv_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:SI 1 "general_operand" "Qmn"))]
+ "!TARGET_64BIT"
+ "mov{b}\t{%b1, %h0|%h0, %b1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "movdi_insv_1_rex64"
+ [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:DI 1 "nonmemory_operand" "Qn"))]
+ "TARGET_64BIT"
+ "mov{b}\t{%b1, %h0|%h0, %b1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+ (const_int 8)))]
+ ""
+ "mov{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_expand "movdi"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (DImode, operands); DONE;")
+
+(define_insn "*pushdi"
+ [(set (match_operand:DI 0 "push_operand" "=<")
+ (match_operand:DI 1 "general_no_elim_operand" "riF*m"))]
+ "!TARGET_64BIT"
+ "#")
+
+(define_insn "*pushdi2_rex64"
+ [(set (match_operand:DI 0 "push_operand" "=<,!<")
+ (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+ "TARGET_64BIT"
+ "@
+ push{q}\t%1
+ #"
+ [(set_attr "type" "push,multi")
+ (set_attr "mode" "DI")])
+
+;; Convert impossible pushes of immediate to existing instructions.
+;; First try to get scratch register and go through it. In case this
+;; fails, push sign extended lower part first and then overwrite
+;; upper part by 32bit move.
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode) && 1"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+ "split_di (operands + 1, 1, operands + 2, operands + 3);
+ operands[1] = gen_lowpart (DImode, operands[2]);
+ operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+ GEN_INT (4)));
+ ")
+
+(define_split
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? flow2_completed : reload_completed)
+ && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+ "split_di (operands + 1, 1, operands + 2, operands + 3);
+ operands[1] = gen_lowpart (DImode, operands[2]);
+ operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+ GEN_INT (4)));
+ ")
+
+(define_insn "*pushdi2_prologue_rex64"
+ [(set (match_operand:DI 0 "push_operand" "=<")
+ (match_operand:DI 1 "general_no_elim_operand" "re*m"))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "push{q}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+(define_insn "*popdi1_epilogue_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+ (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG)
+ (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "pop{q}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "DI")])
+
+(define_insn "popdi1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+ (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG)
+ (plus:DI (reg:DI SP_REG) (const_int 8)))]
+ "TARGET_64BIT"
+ "pop{q}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movdi_xor_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "const0_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size)
+ && reload_completed"
+ "xor{l}\t{%k0, %k0|%k0, %k0}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movdi_or_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "const_int_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_PENTIUM || optimize_size)
+ && reload_completed
+ && operands[1] == constm1_rtx"
+{
+ operands[1] = constm1_rtx;
+ return "or{q}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")])
+
+(define_insn "*movdi_2"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DI 0 "nonimmediate_operand"
+ "=r ,o ,*y,m*y,*y,*Yt,m ,*Yt,*Yt,*x,m ,*x,*x")
+ (match_operand:DI 1 "general_operand"
+ "riFo,riF,C ,*y ,m ,C ,*Yt,*Yt,m ,C ,*x,*x,m "))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ #
+ #
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movdqa\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+
+(define_split
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; %%% This multiword shite has got to go.
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
+ && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand"
+ "=r,r ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y")
+ (match_operand:DI 1 "general_operand"
+ "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SSECVT:
+ if (which_alternative == 13)
+ return "movq2dq\t{%1, %0|%0, %1}";
+ else
+ return "movdq2q\t{%1, %0|%0, %1}";
+ case TYPE_SSEMOV:
+ if (get_attr_mode (insn) == MODE_TI)
+ return "movdqa\t{%1, %0|%0, %1}";
+ /* FALLTHRU */
+ case TYPE_MMXMOV:
+ /* Moves from and into integer register is done using movd opcode with
+ REX prefix. */
+ if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
+ return "movd\t{%1, %0|%0, %1}";
+ return "movq\t{%1, %0|%0, %1}";
+ case TYPE_SSELOG1:
+ case TYPE_MMXADD:
+ return "pxor\t%0, %0";
+ case TYPE_MULTI:
+ return "#";
+ case TYPE_LEA:
+ return "lea{q}\t{%a1, %0|%0, %a1}";
+ default:
+ gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else if (which_alternative == 2)
+ return "movabs{q}\t{%1, %0|%0, %1}";
+ else
+ return "mov{q}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "5")
+ (const_string "mmxadd")
+ (eq_attr "alternative" "6,7,8")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "9")
+ (const_string "sselog1")
+ (eq_attr "alternative" "10,11,12")
+ (const_string "ssemov")
+ (eq_attr "alternative" "13,14")
+ (const_string "ssecvt")
+ (eq_attr "alternative" "4")
+ (const_string "multi")
+ (match_operand:DI 1 "pic_32bit_operand" "")
+ (const_string "lea")
+ ]
+ (const_string "imov")))
+ (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsdi_1_rex64"
+ [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:DI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{q}\t{%1, %P0|%P0, %1}
+ mov{q}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movabsdi_2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a,r")
+ (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{q}\t{%P1, %0|%0, %P1}
+ mov{q}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "DI")])
+
+;; Convert impossible stores of immediate to existing instructions.
+;; First try to get scratch register and go through it. In case this
+;; fails, move by 32bit parts.
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode) && 1"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "split_di (operands, 2, operands + 2, operands + 4);")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? flow2_completed : reload_completed)
+ && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "split_di (operands, 2, operands + 2, operands + 4);")
+
+(define_insn "*swapdi_rex64"
+ [(set (match_operand:DI 0 "register_operand" "+r")
+ (match_operand:DI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_64BIT"
+ "xchg{q}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "DI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE || TARGET_64BIT"
+{
+ if (TARGET_64BIT)
+ ix86_expand_move (TImode, operands);
+ else
+ ix86_expand_vector_move (TImode, operands);
+ DONE;
+})
+
+(define_insn "*movti_internal"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE && !TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (ne (symbol_ref "optimize_size") (const_int 0)))
+ (const_string "V4SF")
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0)))
+ (const_string "V4SF")]
+ (const_string "TI")))])
+
+(define_insn "*movti_rex64"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+ (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 3:
+ case 4:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "general_operand" ""))]
+ "reload_completed && !SSE_REG_P (operands[0])
+ && !SSE_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (match_operand:SF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (SFmode, operands); DONE;")
+
+(define_insn "*pushsf"
+ [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+ (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))]
+ "!TARGET_64BIT"
+{
+ /* Anything else should be already split before reg-stack. */
+ gcc_assert (which_alternative == 1);
+ return "push{l}\t%1";
+}
+ [(set_attr "type" "multi,push,multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "SF,SI,SF")])
+
+(define_insn "*pushsf_rex64"
+ [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+ (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+ "TARGET_64BIT"
+{
+ /* Anything else should be already split before reg-stack. */
+ gcc_assert (which_alternative == 1);
+ return "push{q}\t%q1";
+}
+ [(set_attr "type" "multi,push,multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "SF,DI,SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "memory_operand" ""))]
+ "reload_completed
+ && GET_CODE (operands[1]) == MEM
+ && constant_pool_reference_p (operands[1])"
+ [(set (match_dup 0)
+ (match_dup 1))]
+ "operands[1] = avoid_constant_pool_reference (operands[1]);")
+
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (set (mem:SF (reg:SI SP_REG)) (match_dup 1))])
+
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
+
+(define_insn "*movsf_1"
+ [(set (match_operand:SF 0 "nonimmediate_operand"
+ "=f,m ,f,r ,m ,x,x,x ,m ,!*y,!rm,!*y")
+ (match_operand:SF 1 "general_operand"
+ "fm,f,G ,rmF,Fr,C ,x ,xm,x,rm ,*y ,*y"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], SFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "mov{l}\t{%1, %0|%0, %1}";
+ case 5:
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t%0, %0";
+ else
+ return "xorps\t%0, %0";
+ case 6:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movss\t{%1, %0|%0, %1}";
+ case 7:
+ case 8:
+ return "movss\t{%1, %0|%0, %1}";
+
+ case 9:
+ case 10:
+ return "movd\t{%1, %0|%0, %1}";
+
+ case 11:
+ return "movq\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,9,10")
+ (const_string "SI")
+ (eq_attr "alternative" "5")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APS move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ Do the same for architectures resolving dependencies on
+ the parts. While in DF mode it is better to always handle
+ just register parts, the SF mode is different due to lack
+ of instructions to load just part of the register. It is
+ better to maintain the whole registers in single format
+ to avoid problems on using packed logical operations. */
+ (eq_attr "alternative" "6")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "SF"))
+ (eq_attr "alternative" "11")
+ (const_string "DI")]
+ (const_string "SF")))])
+
+(define_insn "*swapsf"
+ [(set (match_operand:SF 0 "fp_register_operand" "+f")
+ (match_operand:SF 1 "fp_register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "reload_completed || TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "SF")])
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (DFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter. Allow this
+;; pattern for optimize_size too.
+
+(define_insn "*pushdf_nointeger"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+ (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Yt"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*,*")
+ (set_attr "mode" "DF,SI,SI,DF")])
+
+(define_insn "*pushdf_integer"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+ (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Yt"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "DF,SI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (match_operand:DF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT && reload_completed"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (set (mem:DF (reg:SI SP_REG)) (match_dup 1))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (match_operand:DF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT && reload_completed"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (set (mem:DF (reg:DI SP_REG)) (match_dup 1))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ "reload_completed"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; Moving is usually shorter when only FP registers are used. This separate
+;; movdf pattern avoids the use of integer registers for FP operations
+;; when optimizing for size.
+
+(define_insn "*movdf_nointeger"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,*r ,o ,Yt*x,Yt*x,Yt*x ,m ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,*roF,F*r,C ,Yt*x,mYt*x,Yt*x"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], DFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "movdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "movq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ return "movlpd\t{%1, %0|%0, %1}";
+ case MODE_V2SF:
+ return "movlps\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4")
+ (const_string "SI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
+(define_insn "*movdf_integer"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,r ,o ,Yt*x,Yt*x,Yt*x,m ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,roF,Fr,C ,Yt*x,m ,Yt*x"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], DFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "movdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "movq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ return "movlpd\t{%1, %0|%0, %1}";
+ case MODE_V2SF:
+ return "movlps\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ default:
+ gcc_unreachable();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4")
+ (const_string "SI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
+(define_split
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ "reload_completed
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ! (ANY_FP_REG_P (operands[0]) ||
+ (GET_CODE (operands[0]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+ && ! (ANY_FP_REG_P (operands[1]) ||
+ (GET_CODE (operands[1]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*swapdf"
+ [(set (match_operand:DF 0 "fp_register_operand" "+f")
+ (match_operand:DF 1 "fp_register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "reload_completed || TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "DF")])
+
+(define_expand "movxf"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (match_operand:XF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (XFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 3+3*memory operand size
+;; Pushing using integer instructions is longer except for constants
+;; and direct memory references.
+;; (assuming that any given constant is pushed only once, but this ought to be
+;; handled elsewhere).
+
+(define_insn "*pushxf_nointeger"
+ [(set (match_operand:XF 0 "push_operand" "=X,X,X")
+ (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+ "optimize_size"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "XF,SI,SI")])
+
+(define_insn "*pushxf_integer"
+ [(set (match_operand:XF 0 "push_operand" "=<,<")
+ (match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
+ "!optimize_size"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*")
+ (set_attr "mode" "XF,SI")])
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "general_operand" ""))]
+ "reload_completed
+ && (GET_MODE (operands[0]) == XFmode
+ || GET_MODE (operands[0]) == DFmode)
+ && !ANY_FP_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (match_operand:XF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:SI SP_REG)) (match_dup 1))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (match_operand:XF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:DI SP_REG)) (match_dup 1))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+;; Do not use integer registers when optimizing for size
+(define_insn "*movxf_nointeger"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
+ (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
+ "optimize_size
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && (reload_in_progress || reload_completed
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], XFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ /* There is no non-popping store to memory for XFmode. So if
+ we need one, follow the store with a load. */
+ if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0\;fld%z0\t%y0";
+ else
+ return "fstp%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3: case 4:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+ (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_insn "*movxf_integer"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+ (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
+ "!optimize_size
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && (reload_in_progress || reload_completed
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], XFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ /* There is no non-popping store to memory for XFmode. So if
+ we need one, follow the store with a load. */
+ if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0\;fld%z0\t%y0";
+ else
+ return "fstp%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3: case 4:
+ return "#";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+ (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_split
+ [(set (match_operand 0 "nonimmediate_operand" "")
+ (match_operand 1 "general_operand" ""))]
+ "reload_completed
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && GET_MODE (operands[0]) == XFmode
+ && ! (ANY_FP_REG_P (operands[0]) ||
+ (GET_CODE (operands[0]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+ && ! (ANY_FP_REG_P (operands[1]) ||
+ (GET_CODE (operands[1]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "memory_operand" ""))]
+ "reload_completed
+ && GET_CODE (operands[1]) == MEM
+ && (GET_MODE (operands[0]) == XFmode
+ || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode)
+ && constant_pool_reference_p (operands[1])"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx c = avoid_constant_pool_reference (operands[1]);
+ rtx r = operands[0];
+
+ if (GET_CODE (r) == SUBREG)
+ r = SUBREG_REG (r);
+
+ if (SSE_REG_P (r))
+ {
+ if (!standard_sse_constant_p (c))
+ FAIL;
+ }
+ else if (FP_REG_P (r))
+ {
+ if (!standard_80387_constant_p (c))
+ FAIL;
+ }
+ else if (MMX_REG_P (r))
+ FAIL;
+
+ operands[1] = c;
+})
+
+(define_insn "swapxf"
+ [(set (match_operand:XF 0 "register_operand" "+f")
+ (match_operand:XF 1 "register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "XF")])
+
+(define_expand "movtf"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" ""))]
+ "TARGET_64BIT"
+{
+ ix86_expand_move (TFmode, operands);
+ DONE;
+})
+
+(define_insn "*movtf_internal"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
+ (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 3:
+ case 4:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
+
+(define_split
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "general_operand" ""))]
+ "reload_completed && !SSE_REG_P (operands[0])
+ && !SSE_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; Zero extension instructions
+
+(define_expand "zero_extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ ""
+{
+ if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+ {
+ operands[1] = force_reg (HImode, operands[1]);
+ emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "zero_extendhisi2_and"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:HI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "*zero_extendhisi2_movzwl"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+ "movz{wl|x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqihi2"
+ [(parallel
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*zero_extendqihi2_and"
+ [(set (match_operand:HI 0 "register_operand" "=r,?&q")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")])
+
+(define_insn "*zero_extendqihi2_movzbw_and"
+ [(set (match_operand:HI 0 "register_operand" "=r,r")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+ "#"
+ [(set_attr "type" "imovx,alu1")
+ (set_attr "mode" "HI")])
+
+; zero extend to SImode here to avoid partial register stalls
+(define_insn "*zero_extendqihi2_movzbl"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed"
+ "movz{bl|x}\t{%1, %k0|%k0, %k1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; For the movzbw case strip only the clobber
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
+ && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+ && !reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 0) (const_int 0))
+ (set (strict_low_part (match_dup 2)) (match_dup 1))]
+ "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "zero_extendqisi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*zero_extendqisi2_and"
+ [(set (match_operand:SI 0 "register_operand" "=r,?&q")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw_and"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+ "#"
+ [(set_attr "type" "imovx,alu1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed"
+ "movz{bl|x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; For the movzbl case strip only the clobber
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
+ && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+ [(set (match_dup 0)
+ (zero_extend:SI (match_dup 1)))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)
+ && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+ && !reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 0) (const_int 0))
+ (set (strict_low_part (match_dup 2)) (match_dup 1))]
+ "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; %%% Kill me once multi-word ops are sane.
+(define_expand "zero_extendsidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))]
+ ""
+ "if (!TARGET_64BIT)
+ {
+ emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
+ DONE;
+ }
+ ")
+
+(define_insn "zero_extendsidi2_32"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Yi")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "!TARGET_64BIT"
+ "@
+ #
+ #
+ #
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "SI,SI,SI,DI,TI")
+ (set_attr "type" "multi,multi,multi,mmxmov,ssemov")])
+
+(define_insn "zero_extendsidi2_rex64"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Yi")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_64BIT"
+ "@
+ mov\t{%k1, %k0|%k0, %k1}
+ #
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx,imov,mmxmov,ssemov")
+ (set_attr "mode" "SI,DI,SI,SI")])
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (zero_extend:DI (match_dup 0)))]
+ "TARGET_64BIT"
+ [(set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed
+ && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])"
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extendhidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movz{wl|x}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "zero_extendqidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movz{bl|x}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+;; Sign extension instructions
+
+(define_expand "extendsidi2"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 ""))])]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "*extendsidi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+ "!TARGET_64BIT"
+ "#")
+
+(define_insn "extendsidi2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=*a,r")
+ (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
+ "TARGET_64BIT"
+ "@
+ {cltq|cdqe}
+ movs{lq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")
+ (set_attr "prefix_0f" "0")
+ (set_attr "modrm" "0,1")])
+
+(define_insn "extendhidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movs{wq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "extendqidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "TARGET_64BIT"
+ "movs{bq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+;; Extend to memory case when source register does die.
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 2 "register_operand" ""))]
+ "(reload_completed
+ && dead_or_set_p (insn, operands[1])
+ && !reg_mentioned_p (operands[1], operands[0]))"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 4) (match_dup 1))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+;; Extend to memory case when source register does not die.
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 2 "register_operand" ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+ emit_move_insn (operands[3], operands[1]);
+
+ /* Generate a cltd if possible and doing so it profitable. */
+ if (true_regnum (operands[1]) == 0
+ && true_regnum (operands[2]) == 1
+ && (optimize_size || TARGET_USE_CLTD))
+ {
+ emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31)));
+ }
+ else
+ {
+ emit_move_insn (operands[2], operands[1]);
+ emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31)));
+ }
+ emit_move_insn (operands[4], operands[2]);
+ DONE;
+})
+
+;; Extend to register case. Optimize case where source and destination
+;; registers match and cases where we can use cltd.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+ if (true_regnum (operands[3]) != true_regnum (operands[1]))
+ emit_move_insn (operands[3], operands[1]);
+
+ /* Generate a cltd if possible and doing so it profitable. */
+ if (true_regnum (operands[3]) == 0
+ && (optimize_size || TARGET_USE_CLTD))
+ {
+ emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
+ DONE;
+ }
+
+ if (true_regnum (operands[4]) != true_regnum (operands[1]))
+ emit_move_insn (operands[4], operands[1]);
+
+ emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31)));
+ DONE;
+})
+
+(define_insn "extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "=*a,r")
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
+ ""
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cwtl|cwde}";
+ default:
+ return "movs{wl|x}\t{%1,%0|%0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "*extendhisi2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=*a,r")
+ (zero_extend:DI
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cwtl|cwde}";
+ default:
+ return "movs{wl|x}\t{%1,%k0|%k0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "extendqihi2"
+ [(set (match_operand:HI 0 "register_operand" "=*a,r")
+ (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+ ""
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cbtw|cbw}";
+ default:
+ return "movs{bw|x}\t{%1,%0|%0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "HI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "extendqisi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ ""
+ "movs{bl|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
+ "TARGET_64BIT"
+ "movs{bl|x}\t{%1,%k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; Conversions between float and double.
+
+;; These are all no-ops in the model used for the 80387. So just
+;; emit moves.
+
+;; %%% Kill these when call knows how to work out a DFmode push earlier.
+(define_insn "*dummy_extendsfdf2"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "push_operand" "=<")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fYt")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "0"
+ "#")
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (set (mem:DF (reg:SI SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (set (mem:DF (reg:DI SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_insn "*dummy_extendsfxf2"
+ [(set (match_operand:XF 0 "push_operand" "=<")
+ (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))]
+ "0"
+ "#")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
+ ""
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
+ (set (mem:DF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
+ ""
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
+ (set (mem:DF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_expand "extendsfdf2"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (float_extend:DF (match_operand:SF 1 "general_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+ && standard_80387_constant_p (operands[1]) > 0)
+ {
+ operands[1] = simplify_const_unary_operation
+ (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+ emit_move_insn_1 (operands[0], operands[1]);
+ DONE;
+ }
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+ }
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*extendsfdf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ case 2:
+ return "cvtss2sd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "mode" "SF,XF,DF")])
+
+(define_insn "*extendsfdf2_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "cvtss2sd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "*extendsfdf2_i387"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+ "TARGET_80387
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF,XF")])
+
+(define_expand "extendsfxf2"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (float_extend:XF (match_operand:SF 1 "general_operand" "")))]
+ "TARGET_80387"
+{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ if (standard_80387_constant_p (operands[1]) > 0)
+ {
+ operands[1] = simplify_const_unary_operation
+ (FLOAT_EXTEND, XFmode, operands[1], SFmode);
+ emit_move_insn_1 (operands[0], operands[1]);
+ DONE;
+ }
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+ }
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*extendsfxf2_i387"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+ (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+ "TARGET_80387
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ /* There is no non-popping store to memory for XFmode. So if
+ we need one, follow the store with a load. */
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fstp%z0\t%y0\n\tfld%z0\t%y0";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF,XF")])
+
+(define_expand "extenddfxf2"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (float_extend:XF (match_operand:DF 1 "general_operand" "")))]
+ "TARGET_80387"
+{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ if (standard_80387_constant_p (operands[1]) > 0)
+ {
+ operands[1] = simplify_const_unary_operation
+ (FLOAT_EXTEND, XFmode, operands[1], DFmode);
+ emit_move_insn_1 (operands[0], operands[1]);
+ DONE;
+ }
+ operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
+ }
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ operands[1] = force_reg (DFmode, operands[1]);
+})
+
+(define_insn "*extenddfxf2_i387"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+ (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))]
+ "TARGET_80387
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ /* There is no non-popping store to memory for XFmode. So if
+ we need one, follow the store with a load. */
+ if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0\n\tfld%z0\t%y0";
+ else
+ return "fstp%z0\t%y0";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "DF,XF")])
+
+;; %%% This seems bad bad news.
+;; This cannot output into an f-reg because there is no way to be sure
+;; of truncating in that case. Otherwise this is just like a simple move
+;; insn. So we pretend we can output to a reg in order to get better
+;; register preferencing, but we really use a stack slot.
+
+;; Conversion from DFmode to SFmode.
+
+(define_expand "truncdfsf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[1] = force_reg (DFmode, operands[1]);
+
+ if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+ ;
+ else if (flag_unsafe_math_optimizations)
+ ;
+ else
+ {
+ rtx temp = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
+ emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
+ DONE;
+ }
+})
+
+(define_expand "truncdfsf2_with_temp"
+ [(parallel [(set (match_operand:SF 0 "" "")
+ (float_truncate:SF (match_operand:DF 1 "" "")))
+ (clobber (match_operand:SF 2 "" ""))])]
+ "")
+
+(define_insn "*truncdfsf_fast_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,f,x")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f ,f,xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 1:
+ return output_387_reg_move (insn, operands);
+ case 2:
+ return "cvtsd2ss\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "mode" "SF")])
+
+;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
+;; because nothing we do here is unsafe.
+(define_insn "*truncdfsf_fast_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "cvtsd2ss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_fast_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=fm")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f")))]
+ "TARGET_80387 && flag_unsafe_math_optimizations"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r,Yt")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f ,f ,Ytm")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m ,X"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_MIX_SSE_I387"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 1:
+ return "#";
+ case 2:
+ return "cvtsd2ss\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,multi,ssecvt")
+ (set_attr "unit" "*,i387,*")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m"))]
+ "TARGET_80387"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 1:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,multi")
+ (set_attr "unit" "*,i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (float_truncate:SF
+ (match_operand:DF 1 "register_operand" "f")))]
+ "TARGET_80387
+ && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && !TARGET_MIX_SSE_I387"
+{
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "fp_register_operand" "")))
+ (clobber (match_operand 2 "" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+{
+ operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));
+})
+
+;; Conversion from XFmode to SFmode.
+
+(define_expand "truncxfsf2"
+ [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_dup 2))])]
+ "TARGET_80387"
+{
+ if (flag_unsafe_math_optimizations)
+ {
+ rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
+ emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1]));
+ if (reg != operands[0])
+ emit_move_insn (operands[0], reg);
+ DONE;
+ }
+ else
+ operands[2] = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
+})
+
+(define_insn "*truncxfsf2_mixed"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "f,f,f,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
+ "TARGET_MIX_SSE_I387"
+{
+ gcc_assert (!which_alternative);
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov,multi,multi,multi")
+ (set_attr "unit" "*,i387,i387,i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "truncxfsf2_i387_noop"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387 && flag_unsafe_math_optimizations"
+{
+ return output_387_reg_move (insn, operands);
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncxfsf2_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "f,f,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
+ "TARGET_80387"
+{
+ gcc_assert (!which_alternative);
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov,multi,multi")
+ (set_attr "unit" "*,i387,i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncxfsf2_i387_1"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387"
+{
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:SF 2 "memory_operand" ""))]
+ "TARGET_80387 && reload_completed"
+ [(set (match_dup 2) (float_truncate:SF (match_dup 1)))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:SF 0 "memory_operand" "")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:SF 2 "memory_operand" ""))]
+ "TARGET_80387"
+ [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
+ "")
+
+;; Conversion from XFmode to DFmode.
+
+(define_expand "truncxfdf2"
+ [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_dup 2))])]
+ "TARGET_80387"
+{
+ if (flag_unsafe_math_optimizations)
+ {
+ rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode);
+ emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1]));
+ if (reg != operands[0])
+ emit_move_insn (operands[0], reg);
+ DONE;
+ }
+ else
+ operands[2] = assign_386_stack_local (DFmode, SLOT_VIRTUAL);
+})
+
+(define_insn "*truncxfdf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Yt")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "f,f,f,f")))
+ (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+{
+ gcc_assert (!which_alternative);
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov,multi,multi,multi")
+ (set_attr "unit" "*,i387,i387,i387")
+ (set_attr "mode" "DF")])
+
+(define_insn "truncxfdf2_i387_noop"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387 && flag_unsafe_math_optimizations"
+{
+ return output_387_reg_move (insn, operands);
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "DF")])
+
+(define_insn "*truncxfdf2_i387"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "f,f,f")))
+ (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
+ "TARGET_80387"
+{
+ gcc_assert (!which_alternative);
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov,multi,multi")
+ (set_attr "unit" "*,i387,i387")
+ (set_attr "mode" "DF")])
+
+(define_insn "*truncxfdf2_i387_1"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387"
+{
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+}
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:DF 2 "memory_operand" ""))]
+ "TARGET_80387 && reload_completed"
+ [(set (match_dup 2) (float_truncate:DF (match_dup 1)))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "memory_operand" "")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:DF 2 "memory_operand" ""))]
+ "TARGET_80387"
+ [(set (match_dup 0) (float_truncate:DF (match_dup 1)))]
+ "")
+
+;; Signed conversion to DImode.
+
+(define_expand "fix_truncxfdi2"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:XF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_expand "fix_trunc<mode>di2"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:SSEMODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
+{
+ if (TARGET_FISTTP
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ {
+ emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+ if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
+ {
+ rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
+ emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+ DONE;
+ }
+})
+
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (fix:SI (match_operand:XF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_expand "fix_trunc<mode>si2"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (TARGET_FISTTP
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ {
+ emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+ if (SSE_FLOAT_MODE_P (<MODE>mode))
+ {
+ rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+ emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+ DONE;
+ }
+})
+
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+ [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (fix:HI (match_operand:X87MODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+;; APPLE LOCAL begin 4176531
+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "x")
+ (fix:SI (match_operand:SSEMODEF 1 "register_operand" "x")))]
+ "!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_SSE2
+ && !optimize_size && (ix86_preferred_stack_boundary >= 128)"
+{
+ /* APPLE LOCAL 4424891 */
+ ix86_expand_convert_uns_<MODE>2SI_sse(operands); DONE;
+})
+
+;; Unsigned conversion to HImode.
+
+(define_insn "fixuns_truncdfhi2"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r")
+ (fix:HI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "cvttsd2si\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fixuns_truncsfhi2"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r")
+ (fix:HI (match_operand:SF 1 "register_operand" "x,xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "cvttss2si\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "double,vector")])
+;; APPLE LOCAL end 4176531
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_truncsfdi_sse"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
+ "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvttss2si{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncdfdi_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Yt,Ytm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvttsd2si{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncsfsi_sse"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
+ "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvttss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncdfsi_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Yt,Ytm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvttsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,vector")])
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(match_scratch:DF 2 "Yt")
+ (set (match_operand:SSEMODEI24 0 "register_operand" "")
+ (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+ "")
+
+(define_peephole2
+ [(match_scratch:SF 2 "x")
+ (set (match_operand:SSEMODEI24 0 "register_operand" "")
+ (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+ "")
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))]
+ "TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+ else
+ {
+ operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+ operands[1],
+ operands[2]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+ [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f")))
+ (clobber (match_scratch:XF 2 "=&1f"))]
+ "TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)"
+ "* return output_fix_trunc (insn, operands, 1);"
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m"))
+ (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+ "TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)"
+ "#"
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI 0 "register_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI 0 "memory_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
+ (clobber (match_dup 3))])]
+ "")
+
+;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_80387 && !TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (fix:DI (match_operand 1 "register_operand" "f")))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "TARGET_80387 && !TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fix_truncdi_i387_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (fix:DI (match_operand 1 "register_operand" "f,f")))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "TARGET_80387 && !TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (fix:DI (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (fix:DI (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fix_trunc<mode>_i387"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_80387 && !TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+ "TARGET_80387 && !TARGET_FISTTP
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_insn "x86_fnstcw_1"
+ [(set (match_operand:HI 0 "memory_operand" "=m")
+ (unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))]
+ "TARGET_80387"
+ "fnstcw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "HI")
+ (set_attr "unit" "i387")])
+
+(define_insn "x86_fldcw_1"
+ [(set (reg:HI FPSR_REG)
+ (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
+ "TARGET_80387"
+ "fldcw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "HI")
+ (set_attr "unit" "i387")
+ (set_attr "athlon_decode" "vector")])
+
+;; Conversion between fixed point and floating point.
+
+;; Even though we only accept memory inputs, the backend _really_
+;; wants to be able to do this between registers.
+
+(define_expand "floathisf2"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+{
+ if (TARGET_SSE_MATH)
+ {
+ emit_insn (gen_floatsisf2 (operands[0],
+ convert_to_mode (SImode, operands[1], 0)));
+ DONE;
+ }
+})
+
+(define_insn "*floathisf2_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "SF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_expand "floatsisf2"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "")
+
+(define_insn "*floatsisf2_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
+ "TARGET_MIX_SSE_I387"
+ "@
+ fild%z1\t%1
+ #
+ cvtsi2ss\t{%1, %0|%0, %1}
+ cvtsi2ss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "unit" "*,i387,*,*")
+ (set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsisf2_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))]
+ "TARGET_SSE_MATH"
+ "cvtsi2ss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsisf2_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "SF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_expand "floatdisf2"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
+;; APPLE LOCAL begin 6382081
+{
+ if (!TARGET_64BIT)
+ {
+ rtx XFreg = gen_reg_rtx (XFmode);
+ rtx SFstack = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
+ emit_insn (gen_floatdixf2 (copy_rtx (XFreg), operands[1]));
+ emit_insn (gen_truncxfsf2 (copy_rtx (SFstack), XFreg));
+ emit_move_insn (operands[0], SFstack);
+ DONE;
+ }
+})
+;; APPLE LOCAL end 6382081
+
+(define_insn "*floatdisf2_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
+ "TARGET_64BIT && TARGET_MIX_SSE_I387"
+ "@
+ fild%z1\t%1
+ #
+ cvtsi2ss{q}\t{%1, %0|%0, %1}
+ cvtsi2ss{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "unit" "*,i387,*,*")
+ (set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatdisf2_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))]
+ "TARGET_64BIT && TARGET_SSE_MATH"
+ "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatdisf2_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "SF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_expand "floathidf2"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ if (TARGET_SSE2 && TARGET_SSE_MATH)
+ {
+ emit_insn (gen_floatsidf2 (operands[0],
+ convert_to_mode (SImode, operands[1], 0)));
+ DONE;
+ }
+})
+
+(define_insn "*floathidf2_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387 && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "DF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_expand "floatsidf2"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_insn "*floatsidf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
+ (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+ "@
+ fild%z1\t%1
+ #
+ cvtsi2sd\t{%1, %0|%0, %1}
+ cvtsi2sd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "unit" "*,i387,*,*")
+ (set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsidf2_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "cvtsi2sd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsidf2_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "DF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_expand "floatdidf2"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)"
+;; APPLE LOCAL begin 4424891
+{
+ if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
+ {
+ ix86_expand_convert_sign_DI2DF_sse (operands); DONE;
+ }
+})
+;; APPLE LOCAL end 4424891
+
+(define_insn "*floatdidf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
+ (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
+ "@
+ fild%z1\t%1
+ #
+ cvtsi2sd{q}\t{%1, %0|%0, %1}
+ cvtsi2sd{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "unit" "*,i387,*,*")
+ (set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatdidf2_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+ "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatdidf2_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "DF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "floathixf2"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "XF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "floatsixf2"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "XF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "floatdixf2"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "XF")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+;; %%% Kill these when reload knows how to do it.
+(define_split
+ [(set (match_operand 0 "fp_register_operand" "")
+ (float (match_operand 1 "register_operand" "")))]
+ "reload_completed
+ && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))"
+ [(const_int 0)]
+{
+ operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
+ operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
+ ix86_free_from_memory (GET_MODE (operands[1]));
+ DONE;
+})
+
+(define_expand "floatunssisf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))]
+ "!TARGET_64BIT && TARGET_SSE_MATH"
+ "x86_emit_floatuns (operands); DONE;")
+
+;; APPLE LOCAL begin 4424891
+(define_expand "floatunssidf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && (ix86_preferred_stack_boundary >= 128)"
+ "x86_emit_floatuns (operands); DONE;")
+;; APPLE LOCAL end 4424891
+
+(define_expand "floatunsdisf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))]
+ "TARGET_64BIT && TARGET_SSE_MATH"
+ "x86_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))]
+ ;; APPLE LOCAL begin 4176531
+ "(TARGET_64BIT || !optimize_size) && TARGET_SSE2 && TARGET_SSE_MATH
+ && (ix86_preferred_stack_boundary >= 128)"
+ ;; APPLE LOCAL end 4176531
+ "x86_emit_floatuns (operands); DONE;")
+
+;; SSE extract/set expanders
+
+
+;; Add instructions
+
+;; %%% splits for addditi3
+
+(define_expand "addti3"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
+
+(define_insn "*addti3_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+ UNSPEC_ADD_CARRY))
+ (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 4))
+ (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (operands+0, 1, operands+0, operands+3);
+ split_ti (operands+1, 1, operands+1, operands+4);
+ split_ti (operands+2, 1, operands+2, operands+5);")
+
+;; %%% splits for addsidi3
+; [(set (match_operand:DI 0 "nonimmediate_operand" "")
+; (plus:DI (match_operand:DI 1 "general_operand" "")
+; (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))]
+
+(define_expand "adddi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;")
+
+(define_insn "*adddi3_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "general_operand" "roiF,riF")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+ UNSPEC_ADD_CARRY))
+ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 4))
+ (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands+0, 1, operands+0, operands+3);
+ split_di (operands+1, 1, operands+1, operands+4);
+ split_di (operands+2, 1, operands+2, operands+5);")
+
+(define_insn "adddi3_carry_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "adc{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi3_cc_rex64"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "add{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "addqi3_carry"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:QI 2 "general_operand" "qi,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, QImode, operands)"
+ "adc{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "QI")])
+
+(define_insn "addhi3_carry"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+ (match_operand:HI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:HI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, HImode, operands)"
+ "adc{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "HI")])
+
+(define_insn "addsi3_carry"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "adc{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_carry_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (match_operand:SI 2 "general_operand" "rim"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "adc{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cc"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "add{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "addqi3_cc"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qi,qm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (plus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, QImode, operands)"
+ "add{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_expand "addsi3"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;")
+
+(define_insn "*lea_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "no_seg_address_operand" "p"))]
+ "!TARGET_64BIT"
+ "lea{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_rex64"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))]
+ "TARGET_64BIT"
+ "lea{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
+ "TARGET_64BIT"
+ "lea{l}\t{%a1, %k0|%k0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "no_seg_address_operand" "p"))]
+ "TARGET_64BIT"
+ "lea{q}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "DI")])
+
+;; The lea patterns for non-Pmodes needs to be matched by several
+;; insns converted to real lea by splitters.
+
+(define_insn_and_split "*lea_general_1"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (plus (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "register_operand" "r"))
+ (match_operand 3 "immediate_operand" "i")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && GET_MODE (operands[0]) == GET_MODE (operands[2])
+ && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+ || GET_MODE (operands[3]) == VOIDmode)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
+ operands[3]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:SI 3 "immediate_operand" "i"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (mult (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "const248_operand" "i"))
+ (match_operand 3 "nonmemory_operand" "ri")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+ || GET_MODE (operands[3]) == VOIDmode)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
+ operands[3]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "const248_operand" "n"))
+ (match_operand:SI 3 "nonmemory_operand" "ri"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (plus (mult (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "const248_operand" "i"))
+ (match_operand 3 "register_operand" "r"))
+ (match_operand 4 "immediate_operand" "i")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ operands[4] = gen_lowpart (Pmode, operands[4]);
+ pat = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
+ operands[2]),
+ operands[3]),
+ operands[4]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (mult:SI
+ (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "const248_operand" "n"))
+ (match_operand:SI 3 "register_operand" "r"))
+ (match_operand:SI 4 "immediate_operand" "i"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3))
+ (match_dup 4)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ operands[4] = gen_lowpart (Pmode, operands[4]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*adddi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:DI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (plus:DI (match_dup 1)
+ (match_dup 2)))]
+ "")
+
+(define_insn "*adddi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, DImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* ???? We ought to handle there the 32bit case too
+ - do we need new constraint? */
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme"))
+ (match_operand:DI 1 "x86_64_general_operand" "%0")))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCZmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* ???? We ought to handle there the 32bit case too
+ - do we need new constraint? */
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2. This pattern
+; is matched then. We can't accept general immediate, because for
+; case of overflows, the result is messed up.
+; This pattern also don't hold of 0x8000000000000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*adddi_4_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+ (clobber (match_scratch:DI 0 "=rm"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCGCmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128))
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
+ return "sub{q}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi_5_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rme"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+
+(define_insn "*addsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:SI 2 "general_operand" "rmni,rni,lni")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %0|%0, %a2}";
+
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:SI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (plus (match_operand 1 "register_operand" "")
+ (match_operand 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(const_int 0)]
+{
+ rtx pat;
+ /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+ may confuse gen_lowpart. */
+ if (GET_MODE (operands[0]) != Pmode)
+ {
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ }
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+})
+
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload. This allow
+;; patterns constructed from addsi_1 to match.
+(define_insn "addsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SI 2 "general_operand" "rmni,lni"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %k0|%k0, %a2}";
+
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:SI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+})
+
+(define_insn "*addsi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "rmni,rni"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rmni"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
+ [(set (reg FLAGS_REG)
+ (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2. This pattern
+; is matched then. We can't accept general immediate, because for
+; case of overflows, the result is messed up.
+; This pattern also don't hold of 0x80000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*addsi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:SI 0 "=rm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128)))
+ return "sub{l}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rmni"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_expand "addhi3"
+ [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;")
+
+;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
+;; type optimizations enabled by define-splits. This is not important
+;; for PII, and in fact harmful because of partial register stalls.
+
+(define_insn "*addhi_1_lea"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:HI 2 "general_operand" "ri,rm,lni")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "2")
+ (const_string "lea")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu"))))
+ (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*addhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*addhi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmni,rni"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (plus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*addhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addhi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:HI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:HI 0 "=rm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xffff) != 0x8000"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128)))
+ return "sub{w}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+
+(define_insn "*addhi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rmni"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_expand "addqi3"
+ [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*addqi_1_lea"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ int widen = (which_alternative == 2);
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ if (widen)
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ if (widen)
+ return "add{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "3")
+ (const_string "lea")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu"))))
+ (set_attr "mode" "QI,QI,SI,SI")])
+
+(define_insn "*addqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ int widen = (which_alternative == 2);
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ if (widen)
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ if (widen)
+ return "add{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*addqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (plus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qnm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[1] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[1] == constm1_rtx);
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */
+ if (GET_CODE (operands[1]) == CONST_INT
+ && INTVAL (operands[1]) < 0)
+ {
+ operands[1] = GEN_INT (-INTVAL (operands[1]));
+ return "sub{b}\t{%1, %0|%0, %1}";
+ }
+ return "add{b}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 1 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu1")))
+ (set (attr "memory")
+ (if_then_else (match_operand 1 "memory_operand" "")
+ (const_string "load")
+ (const_string "none")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qmni,qni"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (plus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addqi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:QI 0 "=qm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xff) != 0x80"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255))
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{b}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+
+(define_insn "*addqi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qmni"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+
+(define_insn "addqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" "Qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%h0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%h0";
+ }
+
+ default:
+ return "add{b}\t{%2, %h0|%h0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "nonmemory_operand" "Qn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%h0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%h0";
+ }
+
+ default:
+ return "add{b}\t{%2, %h0|%h0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "%0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "add{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "addxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (plus:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "adddf3"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (plus:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_expand "addsf3"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (plus:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "")
+
+;; Subtract instructions
+
+;; %%% splits for subditi3
+
+(define_expand "subti3"
+ [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
+
+(define_insn "*subti3_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
+ (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (minus:DI (match_dup 4)
+ (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 5))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (operands+0, 1, operands+0, operands+3);
+ split_ti (operands+1, 1, operands+1, operands+4);
+ split_ti (operands+2, 1, operands+2, operands+5);")
+
+;; %%% splits for subsidi3
+
+(define_expand "subdi3"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;")
+
+(define_insn "*subdi3_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "general_operand" "roiF,riF")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (minus:SI (match_dup 4)
+ (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 5))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands+0, 1, operands+0, operands+3);
+ split_di (operands+1, 1, operands+1, operands+4);
+ split_di (operands+2, 1, operands+2, operands+5);")
+
+(define_insn "subdi3_carry_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sbb{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_3_rex63"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "subqi3_carry"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+ (match_operand:QI 2 "general_operand" "qi,qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sbb{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "QI")])
+
+(define_insn "subhi3_carry"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+ (match_operand:HI 2 "general_operand" "ri,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sbb{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "HI")])
+
+(define_insn "subsi3_carry"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 2 "general_operand" "ri,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sbb{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "subsi3_carry_zext"
+ [(set (match_operand:DI 0 "register_operand" "=rm,r")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "0,0")
+ (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 2 "general_operand" "ri,rm")))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sbb{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_expand "subsi3"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;")
+
+(define_insn "*subsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "rim"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_dup 1)
+ (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3_zext"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "rim")))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_dup 1)
+ (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %1|%1, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_expand "subhi3"
+ [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;")
+
+(define_insn "*subhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*subhi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "ri,rm"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*subhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "ri,rm")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "subqi3"
+ [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;")
+
+(define_insn "*subqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qn,qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (minus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "sub{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qi,qm"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qi,qm")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "subxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (minus:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "subdf3"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (minus:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_expand "subsf3"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (minus:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "")
+
+;; Multiply instructions
+
+(define_expand "muldi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*muldi3_1_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:DI 2 "x86_64_general_operand" "K,e,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "@
+ imul{q}\t{%2, %1, %0|%0, %1, %2}
+ imul{q}\t{%2, %1, %0|%0, %1, %2}
+ imul{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "DI")])
+
+(define_expand "mulsi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*mulsi3_1"
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:SI 2 "general_operand" "K,i,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM"
+ "@
+ imul{l}\t{%2, %1, %0|%0, %1, %2}
+ imul{l}\t{%2, %1, %0|%0, %1, %2}
+ imul{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*mulsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:SI 2 "general_operand" "K,i,mr"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "@
+ imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ imul{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "SI")])
+
+(define_expand "mulhi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "register_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "")
+
+(define_insn "*mulhi3_1"
+ [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+ (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:HI 2 "general_operand" "K,i,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM"
+ "@
+ imul{w}\t{%2, %1, %0|%0, %1, %2}
+ imul{w}\t{%2, %1, %0|%0, %1, %2}
+ imul{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1,2")
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "HI")])
+
+(define_expand "mulqi3"
+ [(parallel [(set (match_operand:QI 0 "register_operand" "")
+ (mult:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+(define_insn "*mulqi3_1"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "mode" "QI")])
+
+(define_expand "umulqihi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (zero_extend:HI
+ (match_operand:QI 1 "nonimmediate_operand" ""))
+ (zero_extend:HI
+ (match_operand:QI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+(define_insn "*umulqihi3_1"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "mode" "QI")])
+
+(define_expand "mulqihi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))
+ (sign_extend:HI (match_operand:QI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+(define_insn "*mulqihi3_insn"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "imul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "mode" "QI")])
+
+(define_expand "umulditi3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*umulditi3_insn"
+ [(set (match_operand:TI 0 "register_operand" "=A")
+ (mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "DI")])
+
+;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
+(define_expand "umulsidi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT"
+ "")
+
+(define_insn "*umulsidi3_insn"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+(define_expand "mulditi3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (sign_extend:TI
+ (match_operand:DI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*mulditi3_insn"
+ [(set (match_operand:TI 0 "register_operand" "=A")
+ (mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "imul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "DI")])
+
+(define_expand "mulsidi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT"
+ "")
+
+(define_insn "*mulsidi3_insn"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+(define_expand "umuldi3_highpart"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*umuldi3_highpart_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "DI")])
+
+(define_expand "umulsi3_highpart"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*umulsi3_highpart_insn"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*umulsi3_highpart_zext"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32)))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+(define_expand "smuldi3_highpart"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "=d")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (sign_extend:TI
+ (match_operand:DI 2 "register_operand" "")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*smuldi3_highpart_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "imul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "DI")])
+
+(define_expand "smulsi3_highpart"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*smulsi3_highpart_insn"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*smulsi3_highpart_zext"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32)))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "mulxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "muldf3"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (mult:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_expand "mulsf3"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (mult:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "")
+
+;; Divide instructions
+
+(define_insn "divqi3"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (div:QI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "idiv{b}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "QI")])
+
+(define_insn "udivqi3"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (udiv:QI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "div{b}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "divxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (div:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "divdf3"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (div:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_expand "divsf3"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (div:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "")
+
+;; Remainder instructions.
+
+(define_expand "divmoddi4"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (div:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmoddi4_nocltd_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=&a,?a")
+ (div:DI (match_operand:DI 2 "register_operand" "1,0")
+ (match_operand:DI 3 "nonimmediate_operand" "rm,rm")))
+ (set (match_operand:DI 1 "register_operand" "=&d,&d")
+ (mod:DI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && !optimize_size && !TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi4_cltd_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (div:DI (match_operand:DI 2 "register_operand" "a")
+ (match_operand:DI 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 1 "register_operand" "=&d")
+ (mod:DI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (optimize_size || TARGET_USE_CLTD)"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi_noext_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (div:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=d")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (use (match_operand:DI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "idiv{q}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (div:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (match_dup 3)
+ (ashiftrt:DI (match_dup 4) (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:DI (reg:DI 0) (match_dup 2)))
+ (set (match_dup 3)
+ (mod:DI (reg:DI 0) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ /* Avoid use of cltd in favor of a mov+shift. */
+ if (!TARGET_USE_CLTD && !optimize_size)
+ {
+ if (true_regnum (operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_move_insn (operands[3], operands[1]);
+ operands[4] = operands[3];
+ }
+ else
+ {
+ gcc_assert (!true_regnum (operands[1]));
+ operands[4] = operands[1];
+ }
+})
+
+
+(define_expand "divmodsi4"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmodsi4_nocltd"
+ [(set (match_operand:SI 0 "register_operand" "=&a,?a")
+ (div:SI (match_operand:SI 2 "register_operand" "1,0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm,rm")))
+ (set (match_operand:SI 1 "register_operand" "=&d,&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "!optimize_size && !TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi4_cltd"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_operand:SI 2 "register_operand" "a")
+ (match_operand:SI 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "optimize_size || TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi_noext"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=d")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (use (match_operand:SI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "idiv{l}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 3)
+ (ashiftrt:SI (match_dup 4) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:SI (reg:SI 0) (match_dup 2)))
+ (set (match_dup 3)
+ (mod:SI (reg:SI 0) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ /* Avoid use of cltd in favor of a mov+shift. */
+ if (!TARGET_USE_CLTD && !optimize_size)
+ {
+ if (true_regnum (operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_move_insn (operands[3], operands[1]);
+ operands[4] = operands[3];
+ }
+ else
+ {
+ gcc_assert (!true_regnum (operands[1]));
+ operands[4] = operands[1];
+ }
+})
+;; %%% Split me.
+(define_insn "divmodhi4"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (div:HI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:HI 3 "register_operand" "=&d")
+ (mod:HI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "cwtd\;idiv{w}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "udivmoddi4"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (udiv:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=&d")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "xor{q}\t%3, %3\;div{q}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "DI")])
+
+(define_insn "*udivmoddi4_noext"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (udiv:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=d")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "div{q}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (udiv:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(set (match_dup 3) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 3)
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "udivmodsi4"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=&d")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "xor{l}\t%3, %3\;div{l}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "*udivmodsi4_noext"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=d")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "div{l}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (udiv:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(set (match_dup 3) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:SI (match_dup 1) (match_dup 2)))
+ (set (match_dup 3)
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "udivmodhi4"
+ [(set (match_dup 4) (const_int 0))
+ (parallel [(set (match_operand:HI 0 "register_operand" "")
+ (udiv:HI (match_operand:HI 1 "register_operand" "")
+ (match_operand:HI 2 "nonimmediate_operand" "")))
+ (set (match_operand:HI 3 "register_operand" "")
+ (umod:HI (match_dup 1) (match_dup 2)))
+ (use (match_dup 4))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "operands[4] = gen_reg_rtx (HImode);")
+
+(define_insn "*udivmodhi_noext"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (udiv:HI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:HI 3 "register_operand" "=d")
+ (umod:HI (match_dup 1) (match_dup 2)))
+ (use (match_operand:HI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "div{w}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "HI")])
+
+;; We cannot use div/idiv for double division, because it causes
+;; "division by zero" on the overflow and that's not what we expect
+;; from truncate. Because true (non truncating) double division is
+;; never generated, we can't create this insn anyway.
+;
+;(define_insn ""
+; [(set (match_operand:SI 0 "register_operand" "=a")
+; (truncate:SI
+; (udiv:DI (match_operand:DI 1 "register_operand" "A")
+; (zero_extend:DI
+; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
+; (set (match_operand:SI 3 "register_operand" "=d")
+; (truncate:SI
+; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
+; (clobber (reg:CC FLAGS_REG))]
+; ""
+; "div{l}\t{%2, %0|%0, %2}"
+; [(set_attr "type" "idiv")])
+
+;;- Logical AND instructions
+
+;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
+;; Note that this excludes ah.
+
+(define_insn "*testdi_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm")
+ (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re"))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ test{l}\t{%k1, %k0|%k0, %k1}
+ test{l}\t{%k1, %k0|%k0, %k1}
+ test{q}\t{%1, %0|%0, %1}
+ test{q}\t{%1, %0|%0, %1}
+ test{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,0,1,1")
+ (set_attr "mode" "SI,SI,DI,DI,DI")
+ (set_attr "pent_pair" "uv,np,uv,np,uv")])
+
+(define_insn "testsi_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm")
+ (match_operand:SI 1 "general_operand" "in,in,rin"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "test{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testsi_ccno_1"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (and:SI (match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SI 1 "nonmemory_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testhi_1"
+ [(set (reg FLAGS_REG)
+ (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm")
+ (match_operand:HI 1 "general_operand" "n,n,rn"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "test{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "HI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ccz_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "nonmemory_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testqi_1_maybe_si"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
+ (match_operand:QI 1 "general_operand" "n,n,qn,n"))
+ (const_int 0)))]
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ix86_match_ccmode (insn,
+ GET_CODE (operands[1]) == CONST_INT
+ && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
+{
+ if (which_alternative == 3)
+ {
+ if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) < 0)
+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+ return "test{l}\t{%1, %k0|%k0, %1}";
+ }
+ return "test{b}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1,1")
+ (set_attr "mode" "QI,QI,QI,SI")
+ (set_attr "pent_pair" "uv,np,uv,np")])
+
+(define_insn "*testqi_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm")
+ (match_operand:QI 1 "general_operand" "n,n,qn"))
+ (const_int 0)))]
+ "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+ && ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "QI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ext_ccno_0"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 1 "const_int_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testqi_ext_0"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 1 "const_int_operand" "n"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")
+ (set_attr "length_immediate" "1")
+ (set_attr "pent_pair" "np")])
+
+(define_insn "*testqi_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 1 "general_operand" "Qm")))
+ (const_int 0)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 1 "register_operand" "Q")))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+;; Combine likes to form bit extractions for some tests. Humor it.
+(define_insn "*testqi_ext_3"
+ [(set (reg FLAGS_REG)
+ (compare (zero_extract:SI
+ (match_operand 0 "nonimmediate_operand" "rm")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && INTVAL (operands[1]) > 0
+ && INTVAL (operands[2]) >= 0
+ && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+ && (GET_MODE (operands[0]) == SImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)
+ || GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == QImode)"
+ "#")
+
+(define_insn "*testqi_ext_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (zero_extract:DI
+ (match_operand 0 "nonimmediate_operand" "rm")
+ (match_operand:DI 1 "const_int_operand" "")
+ (match_operand:DI 2 "const_int_operand" ""))
+ (const_int 0)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && INTVAL (operands[1]) > 0
+ && INTVAL (operands[2]) >= 0
+ /* Ensure that resulting mask is zero or sign extended operand. */
+ && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+ || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64
+ && INTVAL (operands[1]) > 32))
+ && (GET_MODE (operands[0]) == SImode
+ || GET_MODE (operands[0]) == DImode
+ || GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == QImode)"
+ "#")
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(zero_extract
+ (match_operand 2 "nonimmediate_operand" "")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" ""))
+ (const_int 0)]))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+{
+ rtx val = operands[2];
+ HOST_WIDE_INT len = INTVAL (operands[3]);
+ HOST_WIDE_INT pos = INTVAL (operands[4]);
+ HOST_WIDE_INT mask;
+ enum machine_mode mode, submode;
+
+ mode = GET_MODE (val);
+ if (GET_CODE (val) == MEM)
+ {
+ /* ??? Combine likes to put non-volatile mem extractions in QImode
+ no matter the size of the test. So find a mode that works. */
+ if (! MEM_VOLATILE_P (val))
+ {
+ mode = smallest_mode_for_size (pos + len, MODE_INT);
+ val = adjust_address (val, mode, 0);
+ }
+ }
+ else if (GET_CODE (val) == SUBREG
+ && (submode = GET_MODE (SUBREG_REG (val)),
+ GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+ && pos + len <= GET_MODE_BITSIZE (submode))
+ {
+ /* Narrow a paradoxical subreg to prevent partial register stalls. */
+ mode = submode;
+ val = SUBREG_REG (val);
+ }
+ else if (mode == HImode && pos + len <= 8)
+ {
+ /* Small HImode tests can be converted to QImode. */
+ mode = QImode;
+ val = gen_lowpart (QImode, val);
+ }
+
+ if (len == HOST_BITS_PER_WIDE_INT)
+ mask = -1;
+ else
+ mask = ((HOST_WIDE_INT)1 << len) - 1;
+ mask <<= pos;
+
+ operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
+})
+
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the conversion only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand 2 "register_operand" "")
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "reload_completed
+ && QI_REG_P (operands[2])
+ && GET_MODE (operands[2]) != QImode
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[3]) & ~(255 << 8)))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[3]) & ~(127 << 8))))"
+ [(set (match_dup 0)
+ (match_op_dup 1
+ [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8))
+ (match_dup 3))
+ (const_int 0)]))]
+ "operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);")
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand 2 "nonimmediate_operand" "")
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "reload_completed
+ && GET_MODE (operands[2]) != QImode
+ && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[3]) & ~255))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[3]) & ~127)))"
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+ (const_int 0)]))]
+ "operands[2] = gen_lowpart (QImode, operands[2]);
+ operands[3] = gen_lowpart (QImode, operands[3]);")
+
+
+;; %%% This used to optimize known byte-wide and operations to memory,
+;; and sometimes to QImode registers. If this is considered useful,
+;; it should be done with splitters.
+
+(define_expand "anddi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
+
+(define_insn "*anddi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ {
+ enum machine_mode mode;
+
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+ if (INTVAL (operands[2]) == 0xff)
+ mode = QImode;
+ else
+ {
+ gcc_assert (INTVAL (operands[2]) == 0xffff);
+ mode = HImode;
+ }
+
+ operands[1] = gen_lowpart (mode, operands[1]);
+ if (mode == QImode)
+ return "movz{bq|x}\t{%1,%0|%0, %1}";
+ else
+ return "movz{wq|x}\t{%1,%0|%0, %1}";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (get_attr_mode (insn) == MODE_SI)
+ return "and{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "and{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,*,0")
+ (set_attr "mode" "SI,DI,DI,DI")])
+
+(define_insn "*anddi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+ (and:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, DImode, operands)"
+ "@
+ and{l}\t{%k2, %k0|%k0, %k2}
+ and{q}\t{%2, %0|%0, %2}
+ and{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI,DI,DI")])
+
+(define_expand "andsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (AND, SImode, operands); DONE;")
+
+(define_insn "*andsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
+ (match_operand:SI 2 "general_operand" "ri,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ {
+ enum machine_mode mode;
+
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+ if (INTVAL (operands[2]) == 0xff)
+ mode = QImode;
+ else
+ {
+ gcc_assert (INTVAL (operands[2]) == 0xffff);
+ mode = HImode;
+ }
+
+ operands[1] = gen_lowpart (mode, operands[1]);
+ if (mode == QImode)
+ return "movz{bl|x}\t{%1,%0|%0, %1}";
+ else
+ return "movz{wl|x}\t{%1,%0|%0, %1}";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "and{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,0")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_dup 0)
+ (const_int -65536)))
+ (clobber (reg:CC FLAGS_REG))]
+ "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
+ [(set (strict_low_part (match_dup 1)) (const_int 0))]
+ "operands[1] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+ [(set (match_operand 0 "ext_register_operand" "")
+ (and (match_dup 0)
+ (const_int -256)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+ [(set (strict_low_part (match_dup 1)) (const_int 0))]
+ "operands[1] = gen_lowpart (QImode, operands[0]);")
+
+(define_split
+ [(set (match_operand 0 "ext_register_operand" "")
+ (and (match_dup 0)
+ (const_int -65281)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+ [(parallel [(set (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);")
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*andsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (and:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "andhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (AND, HImode, operands); DONE;")
+
+(define_insn "*andhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
+ (match_operand:HI 2 "general_operand" "ri,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+ gcc_assert (INTVAL (operands[2]) == 0xff);
+ return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ return "and{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,0")
+ (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*andhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (and:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, HImode, operands)"
+ "and{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "andqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (AND, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*andqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qi,qmi,ri")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, QImode, operands)"
+ "@
+ and{b}\t{%2, %0|%0, %2}
+ and{b}\t{%2, %0|%0, %2}
+ and{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (and:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qi,qmi")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "and{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_maybe_si"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qim,qi,i"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+ (and:QI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (AND, QImode, operands)
+ && ix86_match_ccmode (insn,
+ GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
+{
+ if (which_alternative == 2)
+ {
+ if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+ operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+ return "and{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "and{b}\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (and:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, QImode, operands)"
+ "and{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "nonimmediate_operand" "qmi,qi"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (and:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "and{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+;; ??? A bug in recog prevents it from recognizing a const_int as an
+;; operand to zero_extend in andqi_ext_1. It was checking explicitly
+;; for a QImode operand, which of course failed.
+
+(define_insn "andqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+;; Generated by peephole translating test to and. This shows up
+;; often in fp comparisons.
+
+(define_insn "*andqi_ext_0_cc"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "%0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "and{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it introduces risk
+;; of memory mismatch stalls. We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (and:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(~INTVAL (operands[2]) & ~255)
+ && !(INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (and:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical inclusive OR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "iordi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (ior:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (IOR, DImode, operands); DONE;")
+
+(define_insn "*iordi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rme")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*iordi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (ior:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*iordi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+
+(define_expand "iorsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (IOR, SImode, operands); DONE;")
+
+(define_insn "*iorsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rmi")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*iorsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=rm")
+ (zero_extend:DI
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_1_zext_imm"
+ [(set (match_operand:DI 0 "register_operand" "=rm")
+ (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (ior:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*iorsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2_zext_imm"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "iorhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ior:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (IOR, HImode, operands); DONE;")
+
+(define_insn "*iorhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+ (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmi,ri")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, HImode, operands)"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (ior:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, HImode, operands)"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "iorqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ior:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (IOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*iorqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+ (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qmi,qi,ri")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, QImode, operands)"
+ "@
+ or{b}\t{%2, %0|%0, %2}
+ or{b}\t{%2, %0|%0, %2}
+ or{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*iorqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
+ (ior:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qmi,qi")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "or{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (ior:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, QImode, operands)"
+ "or{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (ior:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "or{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qim"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "iorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "ior{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (ior:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (ior:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical XOR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "xordi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (xor:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (XOR, DImode, operands); DONE;")
+
+(define_insn "*xordi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "@
+ xor{q}\t{%2, %0|%0, %2}
+ xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI,DI")])
+
+(define_insn "*xordi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (xor:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "@
+ xor{q}\t{%2, %0|%0, %2}
+ xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI,DI")])
+
+(define_insn "*xordi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_expand "xorsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (XOR, SImode, operands); DONE;")
+
+(define_insn "*xorsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; Add speccase for immediates
+(define_insn "*xorsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_1_zext_imm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (xor:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*xorsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2_zext_imm"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "xorhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (xor:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (XOR, HImode, operands); DONE;")
+
+(define_insn "*xorhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+ (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmi,ri")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, HImode, operands)"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rim,ri"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (xor:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, HImode, operands)"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "xorqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (XOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*xorqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qmi,qi,ri")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, QImode, operands)"
+ "@
+ xor{b}\t{%2, %0|%0, %2}
+ xor{b}\t{%2, %0|%0, %2}
+ xor{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*xorqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (xor:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qi,qmi")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "xorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (xor:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, QImode, operands)"
+ "xor{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qim"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" "qmn"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "nonmemory_operand" "Qn"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_expand "xorqi_cc_ext_1"
+ [(parallel [
+ (set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" ""))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))])]
+ ""
+ "")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (xor:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since XOR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Negation instructions
+
+(define_expand "negti2"
+ [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "ix86_expand_unary_operator (NEG, TImode, operands); DONE;")
+
+(define_insn "*negti2_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=ro")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_unary_operator_ok (NEG, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:DI (match_dup 2)) (const_int 0)))
+ (set (match_dup 0) (neg:DI (match_dup 2)))])
+ (parallel
+ [(set (match_dup 1)
+ (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 1)
+ (neg:DI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (operands+1, 1, operands+2, operands+3);
+ split_ti (operands+0, 1, operands+0, operands+1);")
+
+(define_expand "negdi2"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_expand_unary_operator (NEG, DImode, operands); DONE;")
+
+(define_insn "*negdi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=ro")
+ (neg:DI (match_operand:DI 1 "general_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (neg:DI (match_operand:DI 1 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:SI (match_dup 2)) (const_int 0)))
+ (set (match_dup 0) (neg:SI (match_dup 2)))])
+ (parallel
+ [(set (match_dup 1)
+ (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 1)
+ (neg:SI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands+1, 1, operands+2, operands+3);
+ split_di (operands+0, 1, operands+0, operands+1);")
+
+(define_insn "*negdi2_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "neg{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negdi2_cmpz_rex64"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (neg:DI (match_dup 1)))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "neg{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+
+(define_expand "negsi2"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_expand_unary_operator (NEG, SImode, operands); DONE;")
+
+(define_insn "*negsi2_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; Combine is quite creative about this pattern.
+(define_insn "*negsi2_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
+ (const_int 32)))
+ (const_int 32)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negsi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (neg:SI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_cmpz_zext"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (lshiftrt:DI
+ (neg:DI (ashift:DI
+ (match_operand:DI 1 "register_operand" "0")
+ (const_int 32)))
+ (const_int 32))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+ (const_int 32)))
+ (const_int 32)))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_expand "neghi2"
+ [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_unary_operator (NEG, HImode, operands); DONE;")
+
+(define_insn "*neghi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, HImode, operands)"
+ "neg{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_insn "*neghi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (neg:HI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, HImode, operands)"
+ "neg{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_expand "negqi2"
+ [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_unary_operator (NEG, QImode, operands); DONE;")
+
+(define_insn "*negqi2_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, QImode, operands)"
+ "neg{b}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI")])
+
+(define_insn "*negqi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (neg:QI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, QImode, operands)"
+ "neg{b}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI")])
+
+;; Changing of sign for FP values is doable using integer unit too.
+
+(define_expand "negsf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;")
+
+(define_expand "abssf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;")
+
+(define_insn "*absnegsf2_mixed"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x ,x,f,rm")
+ (match_operator:SF 3 "absneg_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0,0 ")]))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0,X,X "))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE_MATH && TARGET_MIX_SSE_I387
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
+ "#")
+
+(define_insn "*absnegsf2_sse"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,rm")
+ (match_operator:SF 3 "absneg_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")]))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,X"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE_MATH
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
+ "#")
+
+(define_insn "*absnegsf2_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm")
+ (match_operator:SF 3 "absneg_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0,0")]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_80387 && !TARGET_SSE_MATH
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
+ "#")
+
+(define_expand "copysignsf3"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:SF 1 "nonmemory_operand" "")
+ (match_operand:SF 2 "register_operand" "")]
+ "TARGET_SSE_MATH"
+{
+ ix86_expand_copysign (operands);
+ DONE;
+})
+
+(define_insn_and_split "copysignsf3_const"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF
+ [(match_operand:V4SF 1 "vector_move_operand" "xmC")
+ (match_operand:SF 2 "register_operand" "0")
+ (match_operand:V4SF 3 "nonimmediate_operand" "xm")]
+ UNSPEC_COPYSIGN))]
+ "TARGET_SSE_MATH"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_const (operands);
+ DONE;
+})
+
+(define_insn "copysignsf3_var"
+ [(set (match_operand:SF 0 "register_operand" "=x, x, x, x,x")
+ (unspec:SF
+ [(match_operand:SF 2 "register_operand" " x, 0, 0, x,x")
+ (match_operand:SF 3 "register_operand" " 1, 1, x, 1,x")
+ (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
+ (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:V4SF 1 "=x, x, x, x,x"))]
+ "TARGET_SSE_MATH"
+ "#")
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF
+ [(match_operand:SF 2 "register_operand" "")
+ (match_operand:SF 3 "register_operand" "")
+ (match_operand:V4SF 4 "" "")
+ (match_operand:V4SF 5 "" "")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:V4SF 1 ""))]
+ "TARGET_SSE_MATH && reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_var (operands);
+ DONE;
+})
+
+(define_expand "negdf2"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;")
+
+(define_expand "absdf2"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;")
+
+(define_insn "*absnegdf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,f,rm")
+ (match_operator:DF 3 "absneg_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0,0")]))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X,X"))
+ (clobber (reg:CC FLAGS_REG))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
+ "#")
+
+(define_insn "*absnegdf2_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,rm")
+ (match_operator:DF 3 "absneg_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0 ")]))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X "))
+ (clobber (reg:CC FLAGS_REG))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
+ "#")
+
+(define_insn "*absnegdf2_i387"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm")
+ (match_operator:DF 3 "absneg_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0,0")]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
+ "#")
+
+(define_expand "copysigndf3"
+ [(match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "nonmemory_operand" "")
+ (match_operand:DF 2 "register_operand" "")]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+{
+ ix86_expand_copysign (operands);
+ DONE;
+})
+
+(define_insn_and_split "copysigndf3_const"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF
+ [(match_operand:V2DF 1 "vector_move_operand" "xmC")
+ (match_operand:DF 2 "register_operand" "0")
+ (match_operand:V2DF 3 "nonimmediate_operand" "xm")]
+ UNSPEC_COPYSIGN))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_const (operands);
+ DONE;
+})
+
+(define_insn "copysigndf3_var"
+ [(set (match_operand:DF 0 "register_operand" "=x, x, x, x,x")
+ (unspec:DF
+ [(match_operand:DF 2 "register_operand" " x, 0, 0, x,x")
+ (match_operand:DF 3 "register_operand" " 1, 1, x, 1,x")
+ (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
+ (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:V2DF 1 "=x, x, x, x,x"))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "#")
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF
+ [(match_operand:DF 2 "register_operand" "")
+ (match_operand:DF 3 "register_operand" "")
+ (match_operand:V2DF 4 "" "")
+ (match_operand:V2DF 5 "" "")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:V2DF 1 ""))]
+ "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_var (operands);
+ DONE;
+})
+
+(define_expand "negxf2"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387"
+ "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;")
+
+(define_expand "absxf2"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387"
+ "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;")
+
+(define_insn "*absnegxf2_i387"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm")
+ (match_operator:XF 3 "absneg_operator"
+ [(match_operand:XF 1 "nonimmediate_operand" "0,0")]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_80387
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)"
+ "#")
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_expand "negtf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (neg:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_fp_absneg_operator (NEG, TFmode, operands); DONE;")
+
+(define_expand "abstf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (abs:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_fp_absneg_operator (ABS, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operator:TF 3 "absneg_operator"
+ [(match_operand:TF 1 "nonimmediate_operand" "0, x,0")]))
+ (use (match_operand:TF 2 "nonimmediate_operand" "xm,0,X"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_unary_operator_ok (GET_CODE (operands[3]), TFmode, operands)"
+ "#")
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Splitters for fp abs and neg.
+
+(define_split
+ [(set (match_operand 0 "fp_register_operand" "")
+ (match_operator 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "absneg_operator"
+ [(match_operand 1 "register_operand" "")]))
+ (use (match_operand 2 "nonimmediate_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum machine_mode vmode = GET_MODE (operands[2]);
+ rtx tmp;
+
+ operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
+ operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+ if (GET_CODE (operands[3]) == ABS)
+ tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+ else
+ tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+ operands[3] = tmp;
+})
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand:V4SF 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7fffffff, SImode);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80000000, SImode);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ operands[1] = tmp;
+})
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ if (TARGET_64BIT)
+ {
+ tmp = gen_lowpart (DImode, operands[0]);
+ tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
+ operands[0] = tmp;
+
+ if (GET_CODE (operands[1]) == ABS)
+ tmp = const0_rtx;
+ else
+ tmp = gen_rtx_NOT (DImode, tmp);
+ }
+ else
+ {
+ operands[0] = gen_highpart (SImode, operands[0]);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7fffffff, SImode);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80000000, SImode);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ }
+ operands[1] = tmp;
+})
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ operands[0] = gen_rtx_REG (SImode,
+ true_regnum (operands[0])
+ + (TARGET_64BIT ? 1 : 2));
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = GEN_INT (0x7fff);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = GEN_INT (0x8000);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ operands[1] = tmp;
+})
+
+(define_split
+ [(set (match_operand 0 "memory_operand" "")
+ (match_operator 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode);
+ rtx tmp;
+
+ /* APPLE LOCAL begin radar 4117515 */
+ if (size == 4)
+ {
+ operands[0] = adjust_address (operands[0], SImode, 0);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7fffffff, SImode);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80000000, SImode);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ }
+ else
+ {
+ operands[0] = adjust_address (operands[0], QImode, size - 1);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7f, QImode);
+ tmp = gen_rtx_AND (QImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80, QImode);
+ tmp = gen_rtx_XOR (QImode, operands[0], tmp);
+ }
+ }
+ /* APPLE LOCAL end radar 4117515 */
+ operands[1] = tmp;
+})
+
+;; Conditionalize these after reload. If they match before reload, we
+;; lose the clobber and ability to use integer instructions.
+
+(define_insn "*negsf2_1"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (neg:SF (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "SF")])
+
+(define_insn "*negdf2_1"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (neg:DF (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*negxf2_1"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (neg:XF (match_operand:XF 1 "register_operand" "0")))]
+ "TARGET_80387"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+(define_insn "*abssf2_1"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (abs:SF (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "SF")])
+
+(define_insn "*absdf2_1"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (abs:DF (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*absxf2_1"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (abs:XF (match_operand:XF 1 "register_operand" "0")))]
+ "TARGET_80387"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*negextendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (neg:DF (float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*negextenddfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (neg:XF (float_extend:XF
+ (match_operand:DF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+(define_insn "*negextendsfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (neg:XF (float_extend:XF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "fchs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+(define_insn "*absextendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (abs:DF (float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*absextenddfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (abs:XF (float_extend:XF
+ (match_operand:DF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+(define_insn "*absextendsfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (abs:XF (float_extend:XF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "fabs"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+;; One complement instructions
+
+(define_expand "one_cmpldi2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (not:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_unary_operator (NOT, DImode, operands); DONE;")
+
+(define_insn "*one_cmpldi2_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)"
+ "not{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+(define_insn "*one_cmpldi2_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (not:DI (match_dup 1)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, DImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:DI (match_operand:DI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:DI 1 "nonimmediate_operand" "")
+ (not:DI (match_dup 3)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2
+ [(xor:DI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:DI (match_dup 3) (const_int -1)))])]
+ "")
+
+(define_expand "one_cmplsi2"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ ""
+ "ix86_expand_unary_operator (NOT, SImode, operands); DONE;")
+
+(define_insn "*one_cmplsi2_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))]
+ "ix86_unary_operator_ok (NOT, SImode, operands)"
+ "not{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "not{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (not:SI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:SI (match_operand:SI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:SI 1 "nonimmediate_operand" "")
+ (not:SI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:SI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (not:SI (match_dup 1))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:SI (match_operand:SI 3 "register_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:DI 1 "register_operand" "")
+ (zero_extend:DI (not:SI (match_dup 3))))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]
+ "")
+
+(define_expand "one_cmplhi2"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_unary_operator (NOT, HImode, operands); DONE;")
+
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "not{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_insn "*one_cmplhi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (not:HI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NEG, HImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:HI (match_operand:HI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:HI 1 "nonimmediate_operand" "")
+ (not:HI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:HI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; %%% Potential partial reg stall on alternative 1. What to do?
+(define_expand "one_cmplqi2"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_unary_operator (NOT, QImode, operands); DONE;")
+
+(define_insn "*one_cmplqi2_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ "ix86_unary_operator_ok (NOT, QImode, operands)"
+ "@
+ not{b}\t%0
+ not{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI,SI")])
+
+(define_insn "*one_cmplqi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (not:QI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, QImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:QI (match_operand:QI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:QI 1 "nonimmediate_operand" "")
+ (not:QI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:QI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; Arithmetic shift instructions
+
+;; DImode shifts are implemented using the i386 "shift double" opcode,
+;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
+;; is variable, then the count is in %cl and the "imm" operand is dropped
+;; from the assembler input.
+;;
+;; This instruction shifts the target reg/mem as usual, but instead of
+;; shifting in zeros, bits are shifted in from reg operand. If the insn
+;; is a left shift double, bits are taken from the high order bits of
+;; reg, else if the insn is a shift right double, bits are taken from the
+;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
+;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
+;;
+;; Since sh[lr]d does not change the `reg' operand, that is done
+;; separately, making all shifts emit pairs of shift double and normal
+;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
+;; support a 63 bit shift, each shift where the count is in a reg expands
+;; to a pair of shifts, a branch, a shift by 32 and a label.
+;;
+;; If the shift count is a constant, we need never emit more than one
+;; shift pair, instead using moves and sign extension for counts greater
+;; than 31.
+
+(define_expand "ashlti3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+{
+ if (! immediate_operand (operands[2], QImode))
+ {
+ emit_insn (gen_ashlti3_1 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ ix86_expand_binary_operator (ASHIFT, TImode, operands);
+ DONE;
+})
+
+/* APPLE LOCAL begin 6440204 */
+/* Moved here from sse.md so this pattern gets recognized before ashlti3_2. Ugh. */
+(define_insn "sse2_ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ashift:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "pslldq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+/* APPLE LOCAL end 6440204 */
+
+(define_insn "ashlti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (ashift:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "register_operand" "c")))
+ (clobber (match_scratch:DI 3 "=&r"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*ashlti3_2"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (ashift:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "O")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "register_operand" "")))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shld"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+ (ior:DI (ashift:DI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "J,c"))
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "@
+ shld{q}\t{%2, %1, %0|%0, %1, %2}
+ shld{q}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "x86_64_shift_adj"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+ (const_int 64))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "")
+ (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:DI 1 "register_operand" "")
+ (match_dup 0)))
+ (set (match_dup 1)
+ (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:DI 3 "register_operand" "r")
+ (match_dup 1)))]
+ "TARGET_64BIT"
+ "")
+
+(define_expand "ashldi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
+
+(define_insn "*ashldi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cJ,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "add{q}\t{%0, %0|%0, %0}";
+
+ case TYPE_LEA:
+ gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+ gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3);
+ operands[1] = gen_rtx_MULT (DImode, operands[1],
+ GEN_INT (1 << INTVAL (operands[2])));
+ return "lea{q}\t{%a1, %0|%0, %a1}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "index_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (mult:DI (match_dup 1)
+ (match_dup 2)))]
+ "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);")
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashldi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "e"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashift:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{q}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "e"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{q}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=&r,r")
+ (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? flow2_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shld_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
+ (ior:SI (ashift:SI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "@
+ shld{l}\t{%2, %1, %0|%0, %1, %2}
+ shld{l}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "x86_shift_adj_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+ (const_int 32))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:SI 1 "register_operand" "")
+ (match_dup 0)))
+ (set (match_dup 1)
+ (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:SI 3 "register_operand" "r")
+ (match_dup 1)))]
+ "TARGET_CMOVE"
+ "")
+
+(define_expand "x86_shift_adj_2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ ""
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ ix86_expand_clear (operands[1]);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_expand "ashlsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;")
+
+(define_insn "*ashlsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "add{l}\t{%0, %0|%0, %0}";
+
+ case TYPE_LEA:
+ return "#";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "index_register_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])
+ && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4"
+ [(const_int 0)]
+{
+ rtx pat;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (GET_MODE_SIZE (mode) < 4)
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ if (mode != Pmode)
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+
+ pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+})
+
+;; Rare case of shifting RSP is handled by generating move and shift
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "register_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(const_int 0)]
+{
+ rtx pat, clob;
+ emit_move_insn (operands[0], operands[1]);
+ pat = gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_ASHIFT (GET_MODE (operands[0]),
+ operands[0], operands[2]));
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob)));
+ DONE;
+})
+
+(define_insn "*ashlsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t{%k0, %k0|%k0, %k0}";
+
+ case TYPE_LEA:
+ return "#";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%k0";
+ else
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (ashift (match_operand 1 "register_operand" "")
+ (match_operand:QI 2 "const_int_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0) (zero_extend:DI
+ (subreg:SI (mult:SI (match_dup 1)
+ (match_dup 2)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashift:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t{%k0, %k0|%k0, %k0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%k0";
+ else
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_expand "ashlhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;")
+
+(define_insn "*ashlhi3_1_lea"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI,SI")])
+
+(define_insn "*ashlhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "cI")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashift:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+(define_expand "ashlqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+
+(define_insn "*ashlqi3_1_lea"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+ return "add{l}\t{%k0, %k0|%k0, %k0}";
+ else
+ return "add{b}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ }
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t%0";
+ else
+ return "sal{b}\t%0";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI,SI,SI")])
+
+(define_insn "*ashlqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+ return "add{l}\t{%k0, %k0|%k0, %k0}";
+ else
+ return "add{b}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ }
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t%0";
+ else
+ return "sal{b}\t%0";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI,SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlqi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashift:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{b}\t%0";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashlqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{b}\t%0";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI")])
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "ashrti3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+{
+ if (! immediate_operand (operands[2], QImode))
+ {
+ emit_insn (gen_ashrti3_1 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ ix86_expand_binary_operator (ASHIFTRT, TImode, operands);
+ DONE;
+})
+
+(define_insn "ashrti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "register_operand" "c")))
+ (clobber (match_scratch:DI 3 "=&r"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*ashrti3_2"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "O")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "register_operand" "")))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shrd"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+ (ior:DI (ashiftrt:DI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "J,c"))
+ (ashift:DI (match_operand:DI 1 "register_operand" "r,r")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "@
+ shrd{q}\t{%2, %1, %0|%0, %1, %2}
+ shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "ashrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
+
+(define_insn "*ashrdi3_63_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+ (match_operand:DI 2 "const_int_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && INTVAL (operands[2]) == 63
+ && (TARGET_USE_CLTD || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "@
+ {cqto|cqo}
+ sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "@
+ sar{q}\t{%2, %0|%0, %2}
+ sar{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_one_bit_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrdi3_one_bit_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "n"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "n"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? flow2_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shrd_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
+ (ior:SI (ashiftrt:SI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))
+ (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "@
+ shrd{l}\t{%2, %1, %0|%0, %1, %2}
+ shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "pent_pair" "np")
+ (set_attr "mode" "SI")])
+
+(define_expand "x86_shift_adj_3"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ ""
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31)));
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_insn "ashrsi3_31"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+ (match_operand:SI 2 "const_int_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ {cltd|cdq}
+ sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_31_zext"
+ [(set (match_operand:DI 0 "register_operand" "=*d,r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+ (match_operand:SI 2 "const_int_operand" "i,i"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_CLTD || optimize_size)
+ && INTVAL (operands[2]) == 31
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ {cltd|cdq}
+ sar{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_expand "ashrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*ashrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ sar{l}\t{%2, %0|%0, %2}
+ sar{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ sar{l}\t{%2, %k0|%k0, %2}
+ sar{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_one_bit_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_expand "ashrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*ashrhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "@
+ sar{w}\t{%2, %0|%0, %2}
+ sar{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_expand "ashrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*ashrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "@
+ sar{b}\t{%2, %0|%0, %2}
+ sar{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ sar{b}\t{%1, %0|%0, %1}
+ sar{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+
+;; Logical shift instructions
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "lshrti3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+{
+ if (! immediate_operand (operands[2], QImode))
+ {
+ emit_insn (gen_lshrti3_1 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ ix86_expand_binary_operator (LSHIFTRT, TImode, operands);
+ DONE;
+})
+
+(define_insn "lshrti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "register_operand" "c")))
+ (clobber (match_scratch:DI 3 "=&r"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; APPLE LOCAL begin mainline 5951842
+;; This pattern must be defined before *lshrti3_2 to prevent
+;; combine pass from converting sse2_lshrti3 to *lshrti3_2.
+
+(define_insn "sse2_lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "psrldq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end mainline 5951842
+
+(define_insn "*lshrti3_2"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "O")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "register_operand" "")))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
+
+(define_expand "lshrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+
+(define_insn "*lshrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{q}\t{%2, %0|%0, %2}
+ shr{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_one_bit_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrdi3_cconly_one_bit_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "e"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "e"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? flow2_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
+
+(define_expand "lshrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*lshrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{l}\t{%2, %0|%0, %2}
+ shr{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{l}\t{%2, %k0|%k0, %2}
+ shr{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_cmp_one_bit_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_expand "lshrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*lshrhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{w}\t{%2, %0|%0, %2}
+ shr{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*lshrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_expand "lshrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*lshrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "@
+ shr{b}\t{%2, %0|%0, %2}
+ shr{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ shr{b}\t{%1, %0|%0, %1}
+ shr{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi2_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi2_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+;; Rotate instructions
+
+(define_expand "rotldi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (ROTATE, DImode, operands);
+ DONE;
+ }
+ if (!const_1_to_31_operand (operands[2], VOIDmode))
+ FAIL;
+ emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (rotate:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+ (parallel
+ [(set (match_dup 4)
+ (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
+ (lshiftrt:SI (match_dup 5)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 5)
+ (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
+ (lshiftrt:SI (match_dup 3)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands, 1, operands + 4, operands + 5);")
+
+(define_insn "*rotlsi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{q}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotldi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "e,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)"
+ "@
+ rol{q}\t{%2, %0|%0, %2}
+ rol{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "DI")])
+
+(define_expand "rotlsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
+
+(define_insn "*rotlsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{l}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (rotate:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{l}\t%k0"
+ [(set_attr "type" "rotate")
+ (set_attr "length" "2")])
+
+(define_insn "*rotlsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "@
+ rol{l}\t{%2, %0|%0, %2}
+ rol{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rotlsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (rotate:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "@
+ rol{l}\t{%2, %k0|%k0, %2}
+ rol{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_expand "rotlhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
+
+(define_insn "*rotlhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{w}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, HImode, operands)"
+ "@
+ rol{w}\t{%2, %0|%0, %2}
+ rol{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "HI")])
+
+(define_expand "rotlqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
+
+(define_insn "*rotlqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{b}\t%0"
+ [(set_attr "type" "rotate1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, QImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{b}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ rol{b}\t{%1, %0|%0, %1}
+ rol{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rotlqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, QImode, operands)"
+ "@
+ rol{b}\t{%2, %0|%0, %2}
+ rol{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "QI")])
+
+(define_expand "rotrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (ROTATERT, DImode, operands);
+ DONE;
+ }
+ if (!const_1_to_31_operand (operands[2], VOIDmode))
+ FAIL;
+ emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (rotatert:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+ (parallel
+ [(set (match_dup 4)
+ (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
+ (ashift:SI (match_dup 5)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 5)
+ (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
+ (ashift:SI (match_dup 3)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands, 1, operands + 4, operands + 5);")
+
+(define_insn "*rotrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{q}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
+ "@
+ ror{q}\t{%2, %0|%0, %2}
+ ror{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "DI")])
+
+(define_expand "rotrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
+
+(define_insn "*rotrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{l}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (rotatert:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{l}\t%k0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "@
+ ror{l}\t{%2, %0|%0, %2}
+ ror{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rotrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (rotatert:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "@
+ ror{l}\t{%2, %k0|%k0, %2}
+ ror{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_expand "rotrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
+
+(define_insn "*rotrhi3_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, HImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{w}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, HImode, operands)"
+ "@
+ ror{w}\t{%2, %0|%0, %2}
+ ror{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "HI")])
+
+(define_expand "rotrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
+
+(define_insn "*rotrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, QImode, operands)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{b}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{b}\t%0"
+ [(set_attr "type" "rotate1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, QImode, operands)"
+ "@
+ ror{b}\t{%2, %0|%0, %2}
+ ror{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rotrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ ror{b}\t{%1, %0|%0, %1}
+ ror{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
+ (set_attr "mode" "QI")])
+
+;; Bit set / bit test instructions
+
+(define_expand "extv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const8_operand" "")
+ (match_operand:SI 3 "const8_operand" "")))]
+ ""
+{
+ /* Handle extractions from %ah et al. */
+ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+ FAIL;
+
+ /* From mips.md: extract_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[1], VOIDmode))
+ FAIL;
+})
+
+(define_expand "extzv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "")
+ (match_operand:SI 2 "const8_operand" "")
+ (match_operand:SI 3 "const8_operand" "")))]
+ ""
+{
+ /* Handle extractions from %ah et al. */
+ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+ FAIL;
+
+ /* From mips.md: extract_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[1], VOIDmode))
+ FAIL;
+})
+
+(define_expand "insv"
+ [(set (zero_extract (match_operand 0 "ext_register_operand" "")
+ (match_operand 1 "const8_operand" "")
+ (match_operand 2 "const8_operand" ""))
+ (match_operand 3 "register_operand" ""))]
+ ""
+{
+ /* Handle insertions to %ah et al. */
+ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
+ FAIL;
+
+ /* From mips.md: insert_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[0], VOIDmode))
+ FAIL;
+
+ if (TARGET_64BIT)
+ emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3]));
+ else
+ emit_insn (gen_movsi_insv_1 (operands[0], operands[3]));
+
+ DONE;
+})
+
+;; %%% bts, btr, btc, bt.
+;; In general these instructions are *slow* when applied to memory,
+;; since they enforce atomic operation. When applied to registers,
+;; it depends on the cpu implementation. They're never faster than
+;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; no point. But in 64-bit, we can't hold the relevant immediates
+;; within the instruction itself, so operating on bits in the high
+;; 32-bits of a register becomes easier.
+;;
+;; These are slow on Nocona, but fast on Athlon64. We do require the use
+;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+;; negdf respectively, so they can never be disabled entirely.
+
+(define_insn "*btsq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 1))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "bts{q} %1,%0"
+ [(set_attr "type" "alu1")])
+
+(define_insn "*btrq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "btr{q} %1,%0"
+ [(set_attr "type" "alu1")])
+
+(define_insn "*btcq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "btc{q} %1,%0"
+ [(set_attr "type" "alu1")])
+
+;; Allow Nocona to avoid these instructions if a register is available.
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (lo, hi, DImode);
+ if (i >= 31)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (~lo, ~hi, DImode);
+ if (i >= 32)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (not:DI (zero_extract:DI
+ (match_dup 0) (const_int 1) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (lo, hi, DImode);
+ if (i >= 31)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+;; Store-flag instructions.
+
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0. Generate an equality comparison if `seq' or `sne'.
+
+;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way
+;; to avoid partial register stalls. Otherwise do things the setcc+movzx
+;; way, which can later delete the movzx if only QImode is needed.
+
+(define_expand "seq"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (eq:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;")
+
+(define_expand "sne"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ne:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;")
+
+(define_expand "sgt"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (gt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;")
+
+(define_expand "sgtu"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;")
+
+(define_expand "slt"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (lt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;")
+
+(define_expand "sltu"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;")
+
+(define_expand "sge"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ge:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;")
+
+(define_expand "sgeu"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (geu:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;")
+
+(define_expand "sle"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (le:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;")
+
+(define_expand "sleu"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (leu:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;")
+
+(define_expand "sunordered"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (unordered:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;")
+
+(define_expand "sordered"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ordered:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387"
+ "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;")
+
+(define_expand "suneq"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (uneq:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;")
+
+(define_expand "sunge"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (unge:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;")
+
+(define_expand "sungt"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ungt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;")
+
+(define_expand "sunle"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (unle:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;")
+
+(define_expand "sunlt"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (unlt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;")
+
+(define_expand "sltgt"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (ltgt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;")
+
+(define_insn "*setcc_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (match_operator:QI 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))]
+ ""
+ "set%C1\t%0"
+ [(set_attr "type" "setcc")
+ (set_attr "mode" "QI")])
+
+(define_insn "*setcc_2"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (match_operator:QI 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))]
+ ""
+ "set%C1\t%0"
+ [(set_attr "type" "setcc")
+ (set_attr "mode" "QI")])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one. Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;; seta %al
+;; testb %al, %al
+;; sete %al
+
+(define_split
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ne:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (ne:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (eq:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx new_op1 = copy_rtx (operands[1]);
+ operands[1] = new_op1;
+ PUT_MODE (new_op1, QImode);
+ PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+ GET_MODE (XEXP (new_op1, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op1, VOIDmode))
+ FAIL;
+})
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (eq:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx new_op1 = copy_rtx (operands[1]);
+ operands[1] = new_op1;
+ PUT_MODE (new_op1, QImode);
+ PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+ GET_MODE (XEXP (new_op1, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op1, VOIDmode))
+ FAIL;
+})
+
+;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
+;; subsequent logical operations are used to imitate conditional moves.
+;; 0xffffffff is NaN, but not in normalized form, so we can't represent
+;; it directly.
+
+(define_insn "*sse_setccsf"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (match_operator:SF 1 "sse_comparison_operator"
+ [(match_operand:SF 2 "register_operand" "0")
+ (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE"
+ "cmp%D1ss\t{%3, %0|%0, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
+
+(define_insn "*sse_setccdf"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (match_operator:DF 1 "sse_comparison_operator"
+ [(match_operand:DF 2 "register_operand" "0")
+ (match_operand:DF 3 "nonimmediate_operand" "xm")]))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2"
+ "cmp%D1sd\t{%3, %0|%0, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+;; Basic conditional jump instructions.
+;; We ignore the overflow flag for signed branch instructions.
+
+;; For all bCOND expanders, also expand the compare or test insn that
+;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'.
+
+(define_expand "beq"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (EQ, operands[0]); DONE;")
+
+(define_expand "bne"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (NE, operands[0]); DONE;")
+
+(define_expand "bgt"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (GT, operands[0]); DONE;")
+
+(define_expand "bgtu"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (GTU, operands[0]); DONE;")
+
+(define_expand "blt"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (LT, operands[0]); DONE;")
+
+(define_expand "bltu"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (LTU, operands[0]); DONE;")
+
+(define_expand "bge"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (GE, operands[0]); DONE;")
+
+(define_expand "bgeu"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (GEU, operands[0]); DONE;")
+
+(define_expand "ble"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (LE, operands[0]); DONE;")
+
+(define_expand "bleu"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (LEU, operands[0]); DONE;")
+
+(define_expand "bunordered"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNORDERED, operands[0]); DONE;")
+
+(define_expand "bordered"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (ORDERED, operands[0]); DONE;")
+
+(define_expand "buneq"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNEQ, operands[0]); DONE;")
+
+(define_expand "bunge"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNGE, operands[0]); DONE;")
+
+(define_expand "bungt"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNGT, operands[0]); DONE;")
+
+(define_expand "bunle"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNLE, operands[0]); DONE;")
+
+(define_expand "bunlt"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (UNLT, operands[0]); DONE;")
+
+(define_expand "bltgt"
+ [(set (pc)
+ (if_then_else (match_dup 1)
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (LTGT, operands[0]); DONE;")
+
+(define_insn "*jcc_1"
+ [(set (pc)
+ (if_then_else (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "%+j%C1\t%l0"
+ [(set_attr "type" "ibr")
+ (set_attr "modrm" "0")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
+
+(define_insn "*jcc_2"
+ [(set (pc)
+ (if_then_else (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (pc)
+ (label_ref (match_operand 0 "" ""))))]
+ ""
+ "%+j%c1\t%l0"
+ [(set_attr "type" "ibr")
+ (set_attr "modrm" "0")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one. Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;; seta %al
+;; testb %al, %al
+;; je Lfoo
+
+(define_split
+ [(set (pc)
+ (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ PUT_MODE (operands[0], VOIDmode);
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ rtx new_op0 = copy_rtx (operands[0]);
+ operands[0] = new_op0;
+ PUT_MODE (new_op0, VOIDmode);
+ PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
+ GET_MODE (XEXP (new_op0, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op0, VOIDmode))
+ FAIL;
+})
+
+;; Define combination compare-and-branch fp compare instructions to use
+;; during early optimization. Splitting the operation apart early makes
+;; for bad code when we want to reverse the operation.
+
+(define_insn "*fp_jcc_1_mixed"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f,x")
+ (match_operand 2 "nonimmediate_operand" "f,xm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_1_sse"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "x")
+ (match_operand 2 "nonimmediate_operand" "xm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_1_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_CMOVE && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_mixed"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f,x")
+ (match_operand 2 "nonimmediate_operand" "f,xm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_sse"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "x")
+ (match_operand 2 "nonimmediate_operand" "xm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_CMOVE && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_3_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "nonimmediate_operand" "fm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_4_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "nonimmediate_operand" "fm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_5_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_6_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_7_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "const0_operand" "X")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+;; The order of operands in *fp_jcc_8_387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*fp_jcc_8<mode>_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
+ (match_operand 3 "register_operand" "f,f")])
+ (label_ref (match_operand 4 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 5 "=a,a"))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+ && FLOAT_MODE_P (GET_MODE (operands[3]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[3])
+ && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
+ && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))"
+ "#")
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "nonimmediate_operand" "")])
+ (match_operand 3 "" "")
+ (match_operand 4 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+ operands[3], operands[4], NULL_RTX, NULL_RTX);
+ DONE;
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "general_operand" "")])
+ (match_operand 3 "" "")
+ (match_operand 4 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 5 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+ operands[3], operands[4], operands[5], NULL_RTX);
+ DONE;
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "memory_operand" "")])
+ (match_operand 3 "register_operand" "")])
+ (match_operand 4 "" "")
+ (match_operand 5 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 6 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+ ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+ operands[3], operands[7],
+ operands[4], operands[5], operands[6], NULL_RTX);
+ DONE;
+})
+
+;; %%% Kill this when reload knows how to do it.
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "register_operand" "")])
+ (match_operand 3 "register_operand" "")])
+ (match_operand 4 "" "")
+ (match_operand 5 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 6 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+ operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+ ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+ operands[3], operands[7],
+ operands[4], operands[5], operands[6], operands[2]);
+ DONE;
+})
+
+;; Unconditional and other jump instructions
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0 "" "")))]
+ ""
+ "jmp\t%l0"
+ [(set_attr "type" "ibr")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 5)))
+ (set_attr "modrm" "0")])
+
+(define_expand "indirect_jump"
+ [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))]
+ ""
+ "")
+
+(define_insn "*indirect_jump"
+ [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))]
+ "!TARGET_64BIT"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*indirect_jump_rtx64"
+ [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))]
+ "TARGET_64BIT"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_expand "tablejump"
+ [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))
+ (use (label_ref (match_operand 1 "" "")))])]
+ ""
+{
+ /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+ relative. Convert the relative address to an absolute address. */
+ if (flag_pic)
+ {
+ rtx op0, op1;
+ enum rtx_code code;
+
+ if (TARGET_64BIT)
+ {
+ code = PLUS;
+ op0 = operands[0];
+ op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+ }
+ else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+ {
+ code = PLUS;
+ op0 = operands[0];
+ op1 = pic_offset_table_rtx;
+ }
+ else
+ {
+ code = MINUS;
+ op0 = pic_offset_table_rtx;
+ op1 = operands[0];
+ }
+
+ operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ }
+})
+
+(define_insn "*tablejump_1"
+ [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))
+ (use (label_ref (match_operand 1 "" "")))]
+ "!TARGET_64BIT"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*tablejump_1_rtx64"
+ [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))
+ (use (label_ref (match_operand 1 "" "")))]
+ "TARGET_64BIT"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
+
+(define_peephole2
+ [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+ (set (match_operand:QI 1 "register_operand" "")
+ (match_operator:QI 2 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))
+ (set (match_operand 3 "q_regs_operand" "")
+ (zero_extend (match_dup 1)))]
+ "(peep2_reg_dead_p (3, operands[1])
+ || operands_match_p (operands[1], operands[3]))
+ && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+ [(set (match_dup 4) (match_dup 0))
+ (set (strict_low_part (match_dup 5))
+ (match_dup 2))]
+{
+ operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+ operands[5] = gen_lowpart (QImode, operands[3]);
+ ix86_expand_clear (operands[3]);
+})
+
+;; Similar, but match zero_extendhisi2_and, which adds a clobber.
+
+(define_peephole2
+ [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+ (set (match_operand:QI 1 "register_operand" "")
+ (match_operator:QI 2 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))
+ (parallel [(set (match_operand 3 "q_regs_operand" "")
+ (zero_extend (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "(peep2_reg_dead_p (3, operands[1])
+ || operands_match_p (operands[1], operands[3]))
+ && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+ [(set (match_dup 4) (match_dup 0))
+ (set (strict_low_part (match_dup 5))
+ (match_dup 2))]
+{
+ operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+ operands[5] = gen_lowpart (QImode, operands[3]);
+ ix86_expand_clear (operands[3]);
+})
+
+;; Call instructions.
+
+;; The predicates normally associated with named expanders are not properly
+;; checked for calls. This is a bug in the generic code, but it isn't that
+;; easy to fix. Ignore it for now and be prepared to fix things up.
+
+;; Call subroutine returning no value.
+
+(define_expand "call_pop"
+ [(parallel [(call (match_operand:QI 0 "" "")
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "" "")))])]
+ "!TARGET_64BIT"
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0);
+ DONE;
+})
+
+(define_insn "*call_pop_0"
+ [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_pop_1"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "!TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ {
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+ }
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%A0";
+ else
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_expand "call"
+ [(call (match_operand:QI 0 "" "")
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))]
+ ""
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall"
+ [(call (match_operand:QI 0 "" "")
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))]
+ ""
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1);
+ DONE;
+})
+
+(define_insn "*call_0"
+ [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))]
+ ""
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))]
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1"
+ [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "jmp\t%P0";
+ return "jmp\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64"
+ [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P0"
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64_v"
+ [(call (mem:QI (reg:DI 40))
+ (match_operand 0 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t*%%r11"
+ [(set_attr "type" "call")])
+
+
+;; Call subroutine, returning value in operand 0
+
+(define_expand "call_value_pop"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 4 "" "")))])]
+ "!TARGET_64BIT"
+{
+ ix86_expand_call (operands[0], operands[1], operands[2],
+ operands[3], operands[4], 0);
+ DONE;
+})
+
+(define_expand "call_value"
+ [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (use (match_operand:SI 3 "" ""))]
+ ;; Operand 2 not used on the i386.
+ ""
+{
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall_value"
+ [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (use (match_operand:SI 3 "" ""))]
+ ;; Operand 2 not used on the i386.
+ ""
+{
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1);
+ DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+ [(parallel [(call (match_operand 0 "" "")
+ (const_int 0))
+ (match_operand 1 "" "")
+ (match_operand 2 "" "")])]
+ ""
+{
+ int i;
+
+ /* In order to give reg-stack an easier job in validating two
+ coprocessor registers as containing a possible return value,
+ simply pretend the untyped call returns a complex long double
+ value. */
+
+ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+ ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+ operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1),
+ NULL, 0);
+
+ for (i = 0; i < XVECLEN (operands[2], 0); i++)
+ {
+ rtx set = XVECEXP (operands[2], 0, i);
+ emit_move_insn (SET_DEST (set), SET_SRC (set));
+ }
+
+ /* The optimizer does not know that the call sets the function value
+ registers we stored in the result block. We avoid problems by
+ claiming that all hard registers are used and clobbered at this
+ point. */
+ emit_insn (gen_blockage (const0_rtx));
+
+ DONE;
+})
+
+;; Prologue and epilogue instructions
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory. This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; Insn emitted into the body of a function to return from a function.
+;; This is only done if the function's epilogue is known to be simple.
+;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+(define_expand "return"
+ [(return)]
+ "ix86_can_use_return_insn_p ()"
+{
+ if (current_function_pops_args)
+ {
+ rtx popc = GEN_INT (current_function_pops_args);
+ emit_jump_insn (gen_return_pop_internal (popc));
+ DONE;
+ }
+})
+
+(define_insn "return_internal"
+ [(return)]
+ "reload_completed"
+ "ret"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+;; instruction Athlon and K8 have.
+
+(define_insn "return_internal_long"
+ [(return)
+ (unspec [(const_int 0)] UNSPEC_REP)]
+ "reload_completed"
+ "rep {;} ret"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "prefix_rep" "1")
+ (set_attr "modrm" "0")])
+
+(define_insn "return_pop_internal"
+ [(return)
+ (use (match_operand:SI 0 "const_int_operand" ""))]
+ "reload_completed"
+ "ret\t%0"
+ [(set_attr "length" "3")
+ (set_attr "length_immediate" "2")
+ (set_attr "modrm" "0")])
+
+(define_insn "return_indirect_internal"
+ [(return)
+ (use (match_operand:SI 0 "register_operand" "r"))]
+ "reload_completed"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "nop"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; Align to 16-byte boundary, max skip in op0. Used to avoid
+;; branch prediction penalty for the third jump in a 16-byte
+;; block on K8.
+
+(define_insn "align"
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
+ ""
+{
+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
+#else
+ /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
+ The align insn is used to avoid 3 jump instructions in the row to improve
+ branch prediction and the benefits hardly outweigh the cost of extra 8
+ nops on the average inserted by full alignment pseudo operation. */
+#endif
+ return "";
+}
+ [(set_attr "length" "16")])
+
+(define_expand "prologue"
+ [(const_int 1)]
+ ""
+ "ix86_expand_prologue (); DONE;")
+
+(define_insn "set_got"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ { return output_set_got (operands[0], NULL_RTX); }
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(label_ref (match_operand 1 "" ""))]
+ UNSPEC_SET_GOT))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ { return output_set_got (operands[0], operands[1]); }
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "set_got_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+ "TARGET_64BIT"
+ "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0"
+ [(set_attr "type" "lea")
+ (set_attr "length" "6")])
+
+(define_expand "epilogue"
+ [(const_int 1)]
+ ""
+ "ix86_expand_epilogue (1); DONE;")
+
+(define_expand "sibcall_epilogue"
+ [(const_int 1)]
+ ""
+ "ix86_expand_epilogue (0); DONE;")
+
+(define_expand "eh_return"
+ [(use (match_operand 0 "register_operand" ""))]
+ ""
+{
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
+
+ /* Tricky bit: we write the address of the handler to which we will
+ be returning into someone else's stack frame, one word below the
+ stack address we wish to restore. */
+ tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
+ tmp = plus_constant (tmp, -UNITS_PER_WORD);
+ tmp = gen_rtx_MEM (Pmode, tmp);
+ emit_move_insn (tmp, ra);
+
+ if (Pmode == SImode)
+ emit_jump_insn (gen_eh_return_si (sa));
+ else
+ emit_jump_insn (gen_eh_return_di (sa));
+ emit_barrier ();
+ DONE;
+})
+
+(define_insn_and_split "eh_return_si"
+ [(set (pc)
+ (unspec [(match_operand:SI 0 "register_operand" "c")]
+ UNSPEC_EH_RETURN))]
+ "!TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(const_int 1)]
+ "ix86_expand_epilogue (2); DONE;")
+
+(define_insn_and_split "eh_return_di"
+ [(set (pc)
+ (unspec [(match_operand:DI 0 "register_operand" "c")]
+ UNSPEC_EH_RETURN))]
+ "TARGET_64BIT"
+ "#"
+ "reload_completed"
+ [(const_int 1)]
+ "ix86_expand_epilogue (2); DONE;")
+
+(define_insn "leave"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
+ (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "leave"
+ [(set_attr "type" "leave")])
+
+(define_insn "leave_rex64"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+ (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "leave"
+ [(set_attr "type" "leave")])
+
+(define_expand "ffssi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (match_scratch:SI 2 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn_and_split "*ffs_cmove"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:SI 2 "=&r"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_CMOVE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 0) (ctz:SI (match_dup 1)))])
+ (set (match_dup 0) (if_then_else:SI
+ (eq (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_dup 2)
+ (match_dup 0)))
+ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn_and_split "*ffs_no_cmove"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:SI 2 "=&q"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ "reload_completed"
+ [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 0) (ctz:SI (match_dup 1)))])
+ (set (strict_low_part (match_dup 3))
+ (eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
+ (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[3] = gen_lowpart (QImode, operands[2]);
+ ix86_expand_clear (operands[2]);
+})
+
+(define_insn "*ffssi_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (ctz:SI (match_dup 1)))]
+ ""
+ "bsf{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_expand "ffsdi2"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (match_scratch:DI 2 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && TARGET_CMOVE"
+ "")
+
+(define_insn_and_split "*ffs_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:DI 2 "=&r"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_CMOVE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 0) (ctz:DI (match_dup 1)))])
+ (set (match_dup 0) (if_then_else:DI
+ (eq (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_dup 2)
+ (match_dup 0)))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "*ffsdi_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (ctz:DI (match_dup 1)))]
+ "TARGET_64BIT"
+ "bsf{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsf{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "bsf{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_expand "clzsi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*bsr"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsr{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+;; APPLE LOCAL begin mainline bswap
+(define_insn "bswapsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (bswap:SI (match_operand:SI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BSWAP"
+ "bswap\t%k0"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "length" "2")])
+
+(define_insn "bswapdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (bswap:DI (match_operand:DI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_BSWAP"
+ "bswap\t%0"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "length" "3")])
+;; APPLE LOCAL end mainline bswap
+
+(define_expand "clzdi2"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (minus:DI (const_int 63)
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*bsr_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (minus:DI (const_int 63)
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "bsr{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "*tls_global_dynamic_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]}
+ push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop"
+ [(set_attr "type" "multi")
+ (set_attr "length" "14")])
+
+(define_expand "tls_global_dynamic_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI
+ [(match_dup 2)
+ (match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_dup 3)]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (match_scratch:SI 5 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (flag_pic)
+ operands[2] = pic_offset_table_rtx;
+ else
+ {
+ operands[2] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[2]));
+ }
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32
+ (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ operands[3] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_global_dynamic_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+ (match_operand:DI 3 "" "")))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)]
+ "TARGET_64BIT"
+ ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "16")])
+
+(define_expand "tls_global_dynamic_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call:DI (mem:QI (match_dup 2)) (const_int 0)))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)])]
+ ""
+{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64
+ (operands[0], operands[1]));
+ DONE;
+ }
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "11")])
+
+(define_insn "*tls_local_dynamic_base_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]}
+ push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "13")])
+
+(define_expand "tls_local_dynamic_base_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_dup 1) (match_dup 2)]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (flag_pic)
+ operands[1] = pic_offset_table_rtx;
+ else
+ {
+ operands[1] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[1]));
+ }
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32
+ (operands[0], ix86_tls_module_base (), operands[1]));
+ DONE;
+ }
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+ (match_operand:DI 2 "" "")))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+ "TARGET_64BIT"
+ "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_expand "tls_local_dynamic_base_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call:DI (mem:QI (match_dup 1)) (const_int 0)))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+ ""
+{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64
+ (operands[0], ix86_tls_module_base ()));
+ DONE;
+ }
+ operands[1] = ix86_tls_get_addr ();
+})
+
+;; Local dynamic of a single variable is a lose. Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE)
+ (const:SI (unspec:SI
+ [(match_operand:SI 3 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 0)
+ (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+ UNSPEC_TLS_GD))
+ (clobber (match_dup 4))
+ (clobber (match_dup 5))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; Load and add the thread base pointer from %gs:0.
+
+(define_insn "*load_tp_si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(const_int 0)] UNSPEC_TP))]
+ "!TARGET_64BIT"
+ "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+ (match_operand:SI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}"
+ [(set_attr "type" "alu")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*load_tp_di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(const_int 0)] UNSPEC_TP))]
+ "TARGET_64BIT"
+ "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+ (match_operand:DI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}"
+ [(set_attr "type" "alu")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+ [(set (match_dup 3)
+ (plus:SI (match_operand:SI 2 "register_operand" "")
+ (const:SI
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))))
+ (parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_dup 1) (match_dup 3)
+ (match_dup 2) (reg:SI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_operand:SI 1 "register_operand" "b")
+ (const:SI
+ (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_operand:SI 2 "register_operand" "0")
+ ;; we have to make sure %ebx still points to the GOT
+ (match_operand:SI 3 "register_operand" "b")
+ (reg:SI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+ [(set (match_operand:SI 0 "register_operand" "=&a")
+ (plus:SI
+ (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+ (match_operand:SI 4 "" "")
+ (match_operand:SI 2 "register_operand" "b")
+ (reg:SI SP_REG)]
+ UNSPEC_TLSDESC)
+ (const:SI (unspec:SI
+ [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 5))]
+{
+ operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+ [(set (match_dup 2)
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))
+ (parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "DI")
+ (set_attr "length" "7")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+ (match_operand:DI 2 "register_operand" "0")
+ (reg:DI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+ [(set (match_operand:DI 0 "register_operand" "=&a")
+ (plus:DI
+ (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+ (match_operand:DI 3 "" "")
+ (reg:DI SP_REG)]
+ UNSPEC_TLSDESC)
+ (const:DI (unspec:DI
+ [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 4))]
+{
+ operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;;
+
+;; These patterns match the binary 387 instructions for addM3, subM3,
+;; mulM3 and divM3. There are three patterns for each of DFmode and
+;; SFmode. The first is the normal insn, the second the same insn but
+;; with one operand a conversion, and the third the same insn but with
+;; the other operand a conversion. The conversion may be SFmode or
+;; SImode if the target mode DFmode, but only SImode if the target mode
+;; is SFmode.
+
+;; Gcc is slightly more smart about handling normal two address instructions
+;; so use special patterns for add and mull.
+
+(define_insn "*fop_sf_comm_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,x")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))]
+ "TARGET_MIX_SSE_I387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop"))))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_sf_comm_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "%0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_sf_comm_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "%0")
+ (match_operand:SF 2 "nonimmediate_operand" "fm")]))]
+ "TARGET_80387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_sf_1_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,f,x")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0")
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))]
+ "TARGET_MIX_SSE_I387
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
+ (match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_sf_1_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))
+ (set_attr "mode" "SF")])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_sf_1_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0,fm")
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))]
+ "TARGET_80387 && !TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_sf_2<mode>_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+ (match_operand:SF 2 "register_operand" "0,0")]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_sf_3<mode>_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "register_operand" "0,0")
+ (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_comm_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=f,x")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DF 2 "nonimmediate_operand" "fm,Ym")]))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop"))))
+ (set_attr "mode" "DF")])
+
+(define_insn "*fop_df_comm_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "%0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "mode" "DF")])
+
+(define_insn "*fop_df_comm_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "%0")
+ (match_operand:DF 2 "nonimmediate_operand" "fm")]))]
+ "TARGET_80387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "DF")])
+
+(define_insn "*fop_df_1_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=f,f,x")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0")
+ (match_operand:DF 2 "nonimmediate_operand" "fm,0,xm")]))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:DF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:DF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
+ (match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "DF")])
+
+(define_insn "*fop_df_1_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set_attr "mode" "DF")
+ (set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_df_1_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0,fm")
+ (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "DF")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_df_2<mode>_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+ (match_operand:DF 2 "register_operand" "0,0")]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+ && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_3<mode>_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "register_operand" "0,0")
+ (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+ && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+ (match_operand:DF 2 "register_operand" "0,f")]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_5_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "register_operand" "0,f")
+ (float_extend:DF
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_6_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float_extend:DF
+ (match_operand:SF 1 "register_operand" "0,f"))
+ (float_extend:DF
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_comm_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "%0")
+ (match_operand:XF 2 "register_operand" "f")]))]
+ "TARGET_80387
+ && COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_1_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,f")
+ (match_operand:XF 2 "register_operand" "f,0")]))]
+ "TARGET_80387
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_2<mode>_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+ (match_operand:XF 2 "register_operand" "0,0")]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_3<mode>_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,0")
+ (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+ "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float_extend:XF (match_operand 1 "nonimmediate_operand" "fm,0"))
+ (match_operand:XF 2 "register_operand" "0,f")]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_5_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,f")
+ (float_extend:XF
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_6_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float_extend:XF
+ (match_operand 1 "register_operand" "0,f"))
+ (float_extend:XF
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "binary_fp_operator"
+ [(float (match_operand:X87MODEI12 1 "register_operand" ""))
+ (match_operand 2 "register_operand" "")]))]
+ "TARGET_80387 && reload_completed
+ && FLOAT_MODE_P (GET_MODE (operands[0]))"
+ [(const_int 0)]
+{
+ operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
+ operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_fmt_ee (GET_CODE (operands[3]),
+ GET_MODE (operands[3]),
+ operands[4],
+ operands[2])));
+ ix86_free_from_memory (GET_MODE (operands[1]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "binary_fp_operator"
+ [(match_operand 1 "register_operand" "")
+ (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
+ "TARGET_80387 && reload_completed
+ && FLOAT_MODE_P (GET_MODE (operands[0]))"
+ [(const_int 0)]
+{
+ operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+ operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_fmt_ee (GET_CODE (operands[3]),
+ GET_MODE (operands[3]),
+ operands[1],
+ operands[4])));
+ ix86_free_from_memory (GET_MODE (operands[2]));
+ DONE;
+})
+
+;; FPU special functions.
+
+(define_expand "sqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ "TARGET_USE_FANCY_MATH_387 || TARGET_SSE_MATH"
+{
+ if (!TARGET_SSE_MATH)
+ operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*sqrtsf2_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,x")
+ (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm")))]
+ "TARGET_USE_FANCY_MATH_387 && TARGET_MIX_SSE_I387"
+ "@
+ fsqrt
+ sqrtss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fpspc,sse")
+ (set_attr "mode" "SF,SF")
+ (set_attr "athlon_decode" "direct,*")])
+
+(define_insn "*sqrtsf2_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH"
+ "sqrtss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "*")])
+
+(define_insn "*sqrtsf2_i387"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (sqrt:SF (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_expand "sqrtdf2"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_USE_FANCY_MATH_387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ if (!(TARGET_SSE2 && TARGET_SSE_MATH))
+ operands[1] = force_reg (DFmode, operands[1]);
+})
+
+(define_insn "*sqrtdf2_mixed"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=f,x")
+ (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0,xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_USE_FANCY_MATH_387 && TARGET_SSE2 && TARGET_MIX_SSE_I387"
+ "@
+ fsqrt
+ sqrtsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fpspc,sse")
+ (set_attr "mode" "DF,DF")
+ (set_attr "athlon_decode" "direct,*")])
+
+(define_insn "*sqrtdf2_sse"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "sqrtsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "*")])
+
+(define_insn "*sqrtdf2_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (sqrt:DF (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_insn "*sqrtextendsfdf2_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (sqrt:DF (float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_insn "sqrtxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_insn "*sqrtextendsfxf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (sqrt:XF (float_extend:XF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_insn "*sqrtextenddfxf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (sqrt:XF (float_extend:XF
+ (match_operand:DF 1 "register_operand" "0"))))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_insn "fpremxf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM_U))
+ (set (reg:CCFP FPSR_REG)
+ (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fprem"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "fmodsf3"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:SF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn(gen_extendsfxf2 (op1, operands[1]));
+ emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "fmoddf3"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:DF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "fmodxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (operands[1], operands[2],
+ operands[1], operands[2]));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "fprem1xf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM1_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM1_U))
+ (set (reg:CCFP FPSR_REG)
+ (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fprem1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "dremsf3"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:SF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn(gen_extendsfxf2 (op1, operands[1]));
+ emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "dremdf3"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:DF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "dremxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (operands[1], operands[2],
+ operands[1], operands[2]));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*sindf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_insn "*sinsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")])
+
+(define_insn "*sinextendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))]
+ UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_insn "*sinxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*cosdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_insn "*cossf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")])
+
+(define_insn "*cosextendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))]
+ UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_insn "*cosxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+;; With sincos pattern defined, sin and cos builtin function will be
+;; expanded to sincos pattern with one of its outputs left unused.
+;; Cse pass will detected, if two sincos patterns can be combined,
+;; otherwise sincos pattern will be split back to sin or cos pattern,
+;; depending on the unused output.
+
+(define_insn "sincosdf3"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "0")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "=u")
+ (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "")
+ (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "")
+ (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_COS))]
+ "")
+
+(define_insn "sincossf3"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "0")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:SF 1 "register_operand" "=u")
+ (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:SF 1 "register_operand" "")
+ (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:SF 1 "register_operand" "")
+ (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_COS))]
+ "")
+
+(define_insn "*sincosextendsfdf3"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(float_extend:DF
+ (match_operand:SF 2 "register_operand" "0"))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "=u")
+ (unspec:DF [(float_extend:DF
+ (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(float_extend:DF
+ (match_operand:SF 2 "register_operand" ""))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "")
+ (unspec:DF [(float_extend:DF
+ (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 1) (unspec:DF [(float_extend:DF
+ (match_dup 2))] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(float_extend:DF
+ (match_operand:SF 2 "register_operand" ""))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:DF 1 "register_operand" "")
+ (unspec:DF [(float_extend:DF
+ (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 0) (unspec:DF [(float_extend:DF
+ (match_dup 2))] UNSPEC_COS))]
+ "")
+
+(define_insn "sincosxf3"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
+ "")
+
+(define_insn "*tandf3_1"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "0")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:DF 1 "register_operand" "=u")
+ (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fptan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+;; optimize sequence: fptan
+;; fstp %st(0)
+;; fld1
+;; into fptan insn.
+
+(define_peephole2
+ [(parallel[(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:DF 1 "register_operand" "")
+ (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])
+ (set (match_dup 0)
+ (match_operand:DF 3 "immediate_operand" ""))]
+ "standard_80387_constant_p (operands[3]) == 2"
+ [(parallel[(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_TAN_ONE))
+ (set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+ "")
+
+(define_expand "tandf2"
+ [(parallel [(set (match_dup 2)
+ (unspec:DF [(match_operand:DF 1 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (DFmode);
+})
+
+(define_insn "*tansf3_1"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "0")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:SF 1 "register_operand" "=u")
+ (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fptan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")])
+
+;; optimize sequence: fptan
+;; fstp %st(0)
+;; fld1
+;; into fptan insn.
+
+(define_peephole2
+ [(parallel[(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:SF 1 "register_operand" "")
+ (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])
+ (set (match_dup 0)
+ (match_operand:SF 3 "immediate_operand" ""))]
+ "standard_80387_constant_p (operands[3]) == 2"
+ [(parallel[(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_TAN_ONE))
+ (set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+ "")
+
+(define_expand "tansf2"
+ [(parallel [(set (match_dup 2)
+ (unspec:SF [(match_operand:SF 1 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (SFmode);
+})
+
+(define_insn "*tanxf3_1"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fptan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+;; optimize sequence: fptan
+;; fstp %st(0)
+;; fld1
+;; into fptan insn.
+
+(define_peephole2
+ [(parallel[(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])
+ (set (match_dup 0)
+ (match_operand:XF 3 "immediate_operand" ""))]
+ "standard_80387_constant_p (operands[3]) == 2"
+ [(parallel[(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_TAN_ONE))
+ (set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+ "")
+
+(define_expand "tanxf2"
+ [(parallel [(set (match_dup 2)
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_TAN_ONE))
+ (set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+})
+
+(define_insn "atan2df3_1"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 2 "register_operand" "0")
+ (match_operand:DF 1 "register_operand" "u")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:DF 3 "=1"))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fpatan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
+(define_expand "atan2df3"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 2 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx copy = gen_reg_rtx (DFmode);
+ emit_move_insn (copy, operands[1]);
+ emit_insn (gen_atan2df3_1 (operands[0], copy, operands[2]));
+ DONE;
+})
+
+(define_expand "atandf2"
+ [(parallel [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_dup 2)
+ (match_operand:DF 1 "register_operand" "")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:DF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (DFmode);
+ emit_move_insn (operands[2], CONST1_RTX (DFmode)); /* fld1 */
+})
+
+(define_insn "atan2sf3_1"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 2 "register_operand" "0")
+ (match_operand:SF 1 "register_operand" "u")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:SF 3 "=1"))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fpatan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "SF")])
+
+(define_expand "atan2sf3"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 2 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx copy = gen_reg_rtx (SFmode);
+ emit_move_insn (copy, operands[1]);
+ emit_insn (gen_atan2sf3_1 (operands[0], copy, operands[2]));
+ DONE;
+})
+
+(define_expand "atansf2"
+ [(parallel [(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_dup 2)
+ (match_operand:SF 1 "register_operand" "")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:SF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (SFmode);
+ emit_move_insn (operands[2], CONST1_RTX (SFmode)); /* fld1 */
+})
+
+(define_insn "atan2xf3_1"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 1 "register_operand" "u")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 "=1"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fpatan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "atan2xf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx copy = gen_reg_rtx (XFmode);
+ emit_move_insn (copy, operands[1]);
+ emit_insn (gen_atan2xf3_1 (operands[0], copy, operands[2]));
+ DONE;
+})
+
+(define_expand "atanxf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 2)
+ (match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "asindf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:XF (match_dup 5)))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 6) (match_dup 2)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 8 ""))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<8; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "asinsf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:XF (match_dup 5)))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 6) (match_dup 2)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 8 ""))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<8; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "asinxf2"
+ [(set (match_dup 2)
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 1)))
+ (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+ (set (match_dup 5) (sqrt:XF (match_dup 4)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 5) (match_dup 1)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 6 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "acosdf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:XF (match_dup 5)))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 2) (match_dup 6)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 8 ""))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<8; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "acossf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:XF (match_dup 5)))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 2) (match_dup 6)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 8 ""))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<8; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "acosxf2"
+ [(set (match_dup 2)
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 1)))
+ (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+ (set (match_dup 5) (sqrt:XF (match_dup 4)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 1) (match_dup 5)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 6 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_insn "fyl2x_xf3"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 1 "register_operand" "u")]
+ UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 "=1"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fyl2x"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "logsf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ temp = standard_80387_constant_rtx (4); /* fldln2 */
+ emit_move_insn (operands[3], temp);
+})
+
+(define_expand "logdf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ temp = standard_80387_constant_rtx (4); /* fldln2 */
+ emit_move_insn (operands[3], temp);
+})
+
+(define_expand "logxf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (4); /* fldln2 */
+ emit_move_insn (operands[2], temp);
+})
+
+(define_expand "log10sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ temp = standard_80387_constant_rtx (3); /* fldlg2 */
+ emit_move_insn (operands[3], temp);
+})
+
+(define_expand "log10df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ temp = standard_80387_constant_rtx (3); /* fldlg2 */
+ emit_move_insn (operands[3], temp);
+})
+
+(define_expand "log10xf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+
+ operands[2] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (3); /* fldlg2 */
+ emit_move_insn (operands[2], temp);
+})
+
+(define_expand "log2sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (parallel [(set (match_dup 4)
+ (unspec:XF [(match_dup 2)
+ (match_dup 3)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 5 ""))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2xf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_insn "fyl2xp1_xf3"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 1 "register_operand" "u")]
+ UNSPEC_FYL2XP1))
+ (clobber (match_scratch:XF 3 "=1"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fyl2xp1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "log1psf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ ix86_emit_i387_log1p (op0, op1);
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "log1pdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ ix86_emit_i387_log1p (op0, op1);
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "log1pxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ ix86_emit_i387_log1p (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*fxtractxf3"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fxtract"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "logbsf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (parallel [(set (match_dup 3)
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_FRACT))
+ (set (match_dup 4)
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "logbdf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (parallel [(set (match_dup 3)
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_FRACT))
+ (set (match_dup 4)
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 4)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "logbxf2"
+ [(parallel [(set (match_dup 2)
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ilogbsi2"
+ [(parallel [(set (match_dup 2)
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 3 "register_operand" "")
+ (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (fix:SI (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ operands[3] = gen_reg_rtx (XFmode);
+})
+
+(define_insn "*f2xm1xf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_F2XM1))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "f2xm1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*fscalexf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fscale"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "expsf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 10)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<12; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expdf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 10)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<12; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expxf2"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 2)))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<10; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[2], temp);
+ emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp10sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 10)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<12; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (6); /* fldl2t */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp10df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 10)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<12; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (6); /* fldl2t */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp10xf2"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 2)))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<10; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (6); /* fldl2t */
+ emit_move_insn (operands[2], temp);
+ emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp2sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT))
+ (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
+ (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 8)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<10; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp2df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT))
+ (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
+ (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 8)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<10; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "exp2xf2"
+ [(set (match_dup 2) (match_operand:XF 1 "register_operand" ""))
+ (set (match_dup 3) (unspec:XF [(match_dup 2)] UNSPEC_FRNDINT))
+ (set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
+ (set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<9; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expm1df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 11)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 12)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+ (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 14)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<15; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expm1sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 11)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 12)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+ (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 14)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<15; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expm1xf2"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 2)))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 8)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
+ (set (match_operand:XF 0 "register_operand" "")
+ (plus:XF (match_dup 12) (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<13; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[2], temp);
+ emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "ldexpdf3"
+ [(set (match_dup 3)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 4)
+ (float:XF (match_operand:SI 2 "register_operand" "")))
+ (parallel [(set (match_dup 5)
+ (unspec:XF [(match_dup 3) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 6)
+ (unspec:XF [(match_dup 3) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 5)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=3; i<7; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexpsf3"
+ [(set (match_dup 3)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 4)
+ (float:XF (match_operand:SI 2 "register_operand" "")))
+ (parallel [(set (match_dup 5)
+ (unspec:XF [(match_dup 3) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 6)
+ (unspec:XF [(match_dup 3) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 5)))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=3; i<7; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexpxf3"
+ [(set (match_dup 3)
+ (float:XF (match_operand:SI 2 "register_operand" "")))
+ (parallel [(set (match_operand:XF 0 " register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 4)
+ (unspec:XF [(match_dup 1) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=3; i<5; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+})
+
+
+(define_insn "frndintxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "frndint"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "rintdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2 (op0, op1));
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "rintsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2 (op0, op1));
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "rintxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2 (operands[0], operands[1]));
+ DONE;
+})
+
+(define_insn_and_split "*fistdi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fistdi2 (operands[0], operands[1]));
+ else
+ {
+ operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+ emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))
+ (clobber (match_scratch:XF 2 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+ (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+ (clobber (match_dup 3))])]
+ "")
+
+(define_insn_and_split "*fist<mode>2_1"
+ [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+ operands[2]));
+ DONE;
+}
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_with_temp"
+ [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST))]
+ "")
+
+(define_expand "lrint<mode>2"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_floor"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_FLOOR))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+
+ emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_floor_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "XF")])
+
+(define_expand "floorxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "floordf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_floor (op0, op1));
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "floorsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_floor (op0, op1));
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_floor_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_FLOOR))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fist<mode>2_floor (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_floor"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_floor_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fist<mode>2_floor"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_floor_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_expand "lfloor<mode>2"
+ [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_ceil"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_CEIL))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+
+ emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_ceil_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "XF")])
+
+(define_expand "ceilxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "ceildf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_ceil (op0, op1));
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "ceilsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_ceil (op0, op1));
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_ceil_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_CEIL))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_ceil"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_ceil_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fist<mode>2_ceil"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_ceil_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_expand "lceil<mode>2"
+ [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_trunc"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_TRUNC))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+
+ emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_trunc_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "XF")])
+
+(define_expand "btruncxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "btruncdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_trunc (op0, op1));
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "btruncsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_trunc (op0, op1));
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_MASK_PM))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+ emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "mask_pm")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_MASK_PM))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "mask_pm")
+ (set_attr "mode" "XF")])
+
+(define_expand "nearbyintxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
+
+ DONE;
+})
+
+(define_expand "nearbyintdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "nearbyintsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+
+;; Block operation instructions
+
+(define_insn "cld"
+ [(set (reg:SI DIRFLAG_REG) (const_int 0))]
+ ""
+ "cld"
+ [(set_attr "type" "cld")])
+
+(define_expand "movmemsi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:BLK 1 "memory_operand" ""))
+ (use (match_operand:SI 2 "nonmemory_operand" ""))
+ (use (match_operand:SI 3 "const_int_operand" ""))]
+ "! optimize_size || TARGET_INLINE_ALL_STRINGOPS"
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "movmemdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:BLK 1 "memory_operand" ""))
+ (use (match_operand:DI 2 "nonmemory_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))]
+ "TARGET_64BIT"
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strmov"
+ [(set (match_dup 4) (match_operand 3 "memory_operand" ""))
+ (set (match_operand 1 "memory_operand" "") (match_dup 4))
+ (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
+
+ /* If .md ever supports :P for Pmode, these can be directly
+ in the pattern above. */
+ operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+ operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
+
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmov_singleop (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[5], operands[6]));
+ DONE;
+ }
+
+ operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
+})
+
+(define_expand "strmov_singleop"
+ [(parallel [(set (match_operand 1 "memory_operand" "")
+ (match_operand 3 "memory_operand" ""))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 4 "" ""))
+ (set (match_operand 2 "register_operand" "")
+ (match_operand 5 "" ""))
+ (use (reg:SI DIRFLAG_REG))])]
+ "TARGET_SINGLE_STRINGOP || optimize_size"
+ "")
+
+(define_insn "*strmovdi_rex_1"
+ [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+ (mem:DI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 8)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 8)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsq"
+ [(set_attr "type" "str")
+ (set_attr "mode" "DI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_1"
+ [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+ (mem:SI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 4)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 4)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "{movsl|movsd}"
+ [(set_attr "type" "str")
+ (set_attr "mode" "SI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_rex_1"
+ [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
+ (mem:SI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 4)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 4)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "{movsl|movsd}"
+ [(set_attr "type" "str")
+ (set_attr "mode" "SI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovhi_1"
+ [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
+ (mem:HI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 2)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 2)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strmovhi_rex_1"
+ [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
+ (mem:HI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 2)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 2)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strmovqi_1"
+ [(set (mem:QI (match_operand:SI 2 "register_operand" "0"))
+ (mem:QI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 1)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "QI")])
+
+(define_insn "*strmovqi_rex_1"
+ [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
+ (mem:QI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 1)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "QI")])
+
+(define_expand "rep_mov"
+ [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 5 "" ""))
+ (set (match_operand 2 "register_operand" "")
+ (match_operand 6 "" ""))
+ (set (match_operand 1 "memory_operand" "")
+ (match_operand 3 "memory_operand" ""))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))])]
+ ""
+ "")
+
+(define_insn "*rep_movdi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;movsq|rep movsq}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "DI")])
+
+(define_insn "*rep_movsi"
+ [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2")
+ (const_int 2))
+ (match_operand:SI 3 "register_operand" "0")))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (ashift:SI (match_dup 5) (const_int 2))
+ (match_operand:SI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT"
+ "{rep\;movsl|rep movsd}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movsi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 2))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;movsl|rep movsd}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi"
+ [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_operand:SI 3 "register_operand" "0")
+ (match_operand:SI 5 "register_operand" "2")))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5)))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT"
+ "{rep\;movsb|rep movsb}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 5 "register_operand" "2")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;movsb|rep movsb}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_expand "setmemsi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:SI 1 "nonmemory_operand" ""))
+ (use (match_operand 2 "const_int_operand" ""))
+ (use (match_operand 3 "const_int_operand" ""))]
+ ""
+{
+ /* If value to set is not zero, use the library routine. */
+ if (operands[2] != const0_rtx)
+ FAIL;
+
+ if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "setmemdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:DI 1 "nonmemory_operand" ""))
+ (use (match_operand 2 "const_int_operand" ""))
+ (use (match_operand 3 "const_int_operand" ""))]
+ "TARGET_64BIT"
+{
+ /* If value to set is not zero, use the library routine. */
+ if (operands[2] != const0_rtx)
+ FAIL;
+
+ if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strset"
+ [(set (match_operand 1 "memory_operand" "")
+ (match_operand 2 "register_operand" ""))
+ (parallel [(set (match_operand 0 "register_operand" "")
+ (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+ operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
+
+ /* If .md ever supports :P for Pmode, this can be directly
+ in the pattern above. */
+ operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+ GEN_INT (GET_MODE_SIZE (GET_MODE
+ (operands[2]))));
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+})
+
+(define_expand "strset_singleop"
+ [(parallel [(set (match_operand 1 "memory_operand" "")
+ (match_operand 2 "register_operand" ""))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 3 "" ""))
+ (use (reg:SI DIRFLAG_REG))])]
+ "TARGET_SINGLE_STRINGOP || optimize_size"
+ "")
+
+(define_insn "*strsetdi_rex_1"
+ [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:DI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 8)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosq"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*strsetsi_1"
+ [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 4)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "{stosl|stosd}"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*strsetsi_rex_1"
+ [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 4)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "{stosl|stosd}"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*strsethi_1"
+ [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:HI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 2)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strsethi_rex_1"
+ [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:HI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 2)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strsetqi_1"
+ [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 1)))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*strsetqi_rex_1"
+ [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 1)))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_expand "rep_stos"
+ [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 4 "" ""))
+ (set (match_operand 2 "memory_operand" "") (const_int 0))
+ (use (match_operand 3 "register_operand" ""))
+ (use (match_dup 1))
+ (use (reg:SI DIRFLAG_REG))])]
+ ""
+ "")
+
+(define_insn "*rep_stosdi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:DI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;stosq|rep stosq}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*rep_stossi"
+ [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1")
+ (const_int 2))
+ (match_operand:SI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:SI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT"
+ "{rep\;stosl|rep stosd}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_stossi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:SI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;stosl|rep stosd}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_stosqi"
+ [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_operand:SI 3 "register_operand" "0")
+ (match_operand:SI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:QI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))]
+ "!TARGET_64BIT"
+ "{rep\;stosb|rep stosb}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rep_stosqi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:QI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI DIRFLAG_REG))]
+ "TARGET_64BIT"
+ "{rep\;stosb|rep stosb}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_expand "cmpstrnsi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (compare:SI (match_operand:BLK 1 "general_operand" "")
+ (match_operand:BLK 2 "general_operand" "")))
+ (use (match_operand 3 "general_operand" ""))
+ (use (match_operand 4 "immediate_operand" ""))]
+ /* APPLE LOCAL x86_64 disable inline expansion for memcmp until 4436760 is fixed */
+ "(! optimize_size || TARGET_INLINE_ALL_STRINGOPS) && !TARGET_64BIT"
+{
+ /* APPLE LOCAL begin 4134111 */
+ rtx addr1, addr2, out, outlow, count, countreg, align, scratch;
+
+ /* Can't use this if the user has appropriated esi or edi. */
+ if (global_regs[4] || global_regs[5])
+ FAIL;
+
+ /* The Darwin expansion is unsafe on volatile objects. */
+ if (TARGET_MACHO
+ && (MEM_VOLATILE_P (operands[1]) || MEM_VOLATILE_P (operands[2])))
+ FAIL;
+
+ /* APPLE LOCAL begin 4134510 */
+ if (TARGET_MACHO)
+ {
+ count = operands[3];
+ if (GET_CODE (count) != CONST_INT || INTVAL (count) > 30)
+ FAIL;
+ }
+ /* APPLE LOCAL end 4134510 */
+
+ if (TARGET_MACHO)
+ scratch = gen_reg_rtx (SImode);
+
+ out = operands[0];
+ if (GET_CODE (out) != REG)
+ out = gen_reg_rtx (SImode);
+
+ addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+ addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
+ if (addr1 != XEXP (operands[1], 0))
+ operands[1] = replace_equiv_address_nv (operands[1], addr1);
+ if (addr2 != XEXP (operands[2], 0))
+ operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
+ count = operands[3];
+ countreg = ix86_zero_extend_to_Pmode (count);
+
+ /* %%% Iff we are testing strict equality, we can use known alignment
+ to good advantage. This may be possible with combine, particularly
+ once cc0 is dead. */
+ align = operands[4];
+
+ emit_insn (gen_cld ());
+ if (GET_CODE (count) == CONST_INT)
+ {
+ if (INTVAL (count) == 0)
+ {
+ emit_move_insn (operands[0], const0_rtx);
+ DONE;
+ }
+ if (!TARGET_MACHO)
+ emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+ operands[1], operands[2]));
+ else
+ emit_insn (gen_darwin_cmpstrnqi_nz_1 (out, addr1, addr2, countreg, align,
+ operands[1], operands[2], scratch));
+ }
+ else
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
+ else
+ emit_insn (gen_cmpsi_1 (countreg, countreg));
+ if (!TARGET_MACHO)
+ emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+ operands[1], operands[2]));
+ else
+ emit_insn (gen_darwin_cmpstrqi_1 (out, addr1, addr2, countreg, align,
+ operands[1], operands[2], scratch));
+ }
+
+ if (!TARGET_MACHO)
+ {
+ outlow = gen_lowpart (QImode, out);
+ emit_insn (gen_cmpintqi (outlow));
+ emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
+ }
+ /* APPLE LOCAL end 4134111 */
+
+ if (operands[0] != out)
+ emit_move_insn (operands[0], out);
+
+ DONE;
+})
+
+;; Produce a tri-state integer (-1, 0, 1) from condition codes.
+
+(define_expand "cmpintqi"
+ [(set (match_dup 1)
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_dup 2)
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (parallel [(set (match_operand:QI 0 "register_operand" "")
+ (minus:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "operands[1] = gen_reg_rtx (QImode);
+ operands[2] = gen_reg_rtx (QImode);")
+
+;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
+;; zero. Emit extra code to make sure that a zero-length compare is EQ.
+
+(define_expand "cmpstrnqi_nz_1"
+ [(parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand 4 "memory_operand" "")
+ (match_operand 5 "memory_operand" "")))
+ (use (match_operand 2 "register_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_dup 2))])]
+ ""
+ "")
+
+(define_insn "*cmpstrnqi_nz_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
+ (use (match_operand:SI 6 "register_operand" "2"))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:SI 0 "register_operand" "=S"))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (match_operand:SI 2 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_nz_rex_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
+ (use (match_operand:DI 6 "register_operand" "2"))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; The same, but the count is not known to not be zero.
+
+(define_expand "cmpstrnqi_1"
+ [(parallel [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand 2 "register_operand" "")
+ (const_int 0))
+ (compare:CC (match_operand 4 "memory_operand" "")
+ (match_operand 5 "memory_operand" ""))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:CC FLAGS_REG))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_dup 2))])]
+ ""
+ "")
+
+(define_insn "*cmpstrnqi_1"
+ [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:SI 5 "register_operand" "1")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:CC FLAGS_REG))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:SI 0 "register_operand" "=S"))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (match_operand:SI 2 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_rex_1"
+ [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:CC FLAGS_REG))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; APPLE LOCAL begin 4134111
+;; Darwin's memcmp returns the difference of the last 2 bytes compared,
+;; not -1/0/1. Unfortunately we must reload those bytes to get the
+;; result, as they aren't sitting around anywhere. This is still
+;; faster than calling libc though.
+
+(define_expand "darwin_cmpstrnqi_nz_1"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (compare:CC (match_operand 5 "memory_operand" "")
+ (match_operand 6 "memory_operand" "")) 0))
+ (use (match_operand 3 "register_operand" ""))
+ (use (match_operand:SI 4 "immediate_operand" ""))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))
+ (clobber (match_operand 7 "register_operand" ""))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_dup 3))])]
+ ""
+ "")
+
+(define_insn "*darwin_cmpstrnqi_nz_1"
+;; APPLE LOCAL 5379188
+ [(set (match_operand:SI 0 "register_operand" "=&r")
+ (subreg:SI (compare:CC (mem:BLK (match_operand:SI 5 "register_operand" "1"))
+ (mem:BLK (match_operand:SI 6 "register_operand" "2"))) 0))
+ (use (match_operand:SI 8 "register_operand" "3"))
+ (use (match_operand:SI 4 "immediate_operand" "i"))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:SI 1 "register_operand" "=S"))
+ (clobber (match_operand:SI 2 "register_operand" "=D"))
+ (clobber (match_operand:SI 7 "register_operand" "=r"))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 3 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb\n\tmov\t$0, %0\n\tje\t0f\n\tmovzbl\t-1(%5), %0\n\tmovzbl\t-1(%6), %8\n\tsubl\t%8,%0\n0:"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_expand "darwin_cmpstrqi_1"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (if_then_else:CC (ne (match_operand 3 "register_operand" "")
+ (const_int 0))
+ (compare:CC (match_operand 5 "memory_operand" "")
+ (match_operand 6 "memory_operand" ""))
+ (const_int 0)) 0))
+ (use (match_operand:SI 4 "immediate_operand" ""))
+ (use (reg:SI DIRFLAG_REG))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))
+ (clobber (match_operand 7 "register_operand" ""))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_dup 3))])]
+ ""
+ "")
+
+(define_insn "*darwin_cmpstrqi_1"
+;; APPLE LOCAL 5379188
+ [(set (match_operand:SI 0 "register_operand" "=&r")
+ (subreg:SI (if_then_else:CC (ne (match_operand:SI 8 "register_operand" "3")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:SI 5 "register_operand" "1"))
+ (mem:BLK (match_operand:SI 6 "register_operand" "2")))
+ (const_int 0)) 0))
+ (use (match_operand:SI 4 "immediate_operand" "i"))
+ (use (reg:SI DIRFLAG_REG))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 1 "register_operand" "=S"))
+ (clobber (match_operand:SI 2 "register_operand" "=D"))
+ (clobber (match_operand:SI 7 "register_operand" "=r"))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 3 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb\n\tmov\t$0, %0\n\tje\t0f\n\tmovzbl\t-1(%5), %0\n\tmovzbl\t-1(%6), %8\n\tsubl\t%8,%0\n0:"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; APPLE LOCAL end 4134111
+
+(define_expand "strlensi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+ ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "strlendi"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+ ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "strlenqi_1"
+ [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*strlenqi_1"
+ [(set (match_operand:SI 0 "register_operand" "=&c")
+ (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
+ (match_operand:QI 2 "register_operand" "a")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "repnz{\;| }scasb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*strlenqi_rex_1"
+ [(set (match_operand:DI 0 "register_operand" "=&c")
+ (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
+ (match_operand:QI 2 "register_operand" "a")
+ (match_operand:DI 3 "immediate_operand" "i")
+ (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "repnz{\;| }scasb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; Peephole optimizations to clean up after cmpstrn*. This should be
+;; handled in combine, but it is not currently up to the task.
+;; When used for their truth value, the cmpstrn* expanders generate
+;; code like this:
+;;
+;; repz cmpsb
+;; seta %al
+;; setb %dl
+;; cmpb %al, %dl
+;; jcc label
+;;
+;; The intermediate three instructions are unnecessary.
+
+;; This one handles cmpstrn*_nz_1...
+(define_peephole2
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+ (mem:BLK (match_operand 5 "register_operand" ""))))
+ (use (match_operand 6 "register_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))])
+ (set (match_operand:QI 7 "register_operand" "")
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_operand:QI 8 "register_operand" "")
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (reg FLAGS_REG)
+ (compare (match_dup 7) (match_dup 8)))
+ ]
+ "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_dup 4))
+ (mem:BLK (match_dup 5))))
+ (use (match_dup 6))
+ (use (match_dup 3))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 2))])]
+ "")
+
+;; ...and this one handles cmpstrn*_1.
+(define_peephole2
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand 6 "register_operand" "")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+ (mem:BLK (match_operand 5 "register_operand" "")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:CC FLAGS_REG))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))])
+ (set (match_operand:QI 7 "register_operand" "")
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_operand:QI 8 "register_operand" "")
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (reg FLAGS_REG)
+ (compare (match_dup 7) (match_dup 8)))
+ ]
+ "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_dup 6)
+ (const_int 0))
+ (compare:CC (mem:BLK (match_dup 4))
+ (mem:BLK (match_dup 5)))
+ (const_int 0)))
+ (use (match_dup 3))
+ (use (reg:CC FLAGS_REG))
+ (use (reg:SI DIRFLAG_REG))
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 2))])]
+ "")
+
+
+
+;; Conditional move instructions.
+
+(define_expand "movdicc"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (if_then_else:DI (match_operand 1 "comparison_operator" "")
+ (match_operand:DI 2 "general_operand" "")
+ (match_operand:DI 3 "general_operand" "")))]
+ "TARGET_64BIT"
+ "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+
+(define_insn "x86_movdicc_0_m1_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int -1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "sbb{q}\t%0, %0"
+ ; Since we don't have the proper number of operands for an alu insn,
+ ; fill in all the blanks.
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movdicc_c_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:DI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_64BIT && TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "movsicc"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (if_then_else:SI (match_operand 1 "comparison_operator" "")
+ (match_operand:SI 2 "general_operand" "")
+ (match_operand:SI 3 "general_operand" "")))]
+ ""
+ "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_insn "x86_movsicc_0_m1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int -1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "sbb{l}\t%0, %0"
+ ; Since we don't have the proper number of operands for an alu insn,
+ ; fill in all the blanks.
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsicc_noc"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
+(define_expand "movhicc"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (if_then_else:HI (match_operand 1 "comparison_operator" "")
+ (match_operand:HI 2 "general_operand" "")
+ (match_operand:HI 3 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+
+(define_insn "*movhicc_noc"
+ [(set (match_operand:HI 0 "register_operand" "=r,r")
+ (if_then_else:HI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:HI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:HI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "HI")])
+
+(define_expand "movqicc"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (if_then_else:QI (match_operand 1 "comparison_operator" "")
+ (match_operand:QI 2 "general_operand" "")
+ (match_operand:QI 3 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+
+(define_insn_and_split "*movqicc_noc"
+ [(set (match_operand:QI 0 "register_operand" "=r,r")
+ (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+ [(match_operand 4 "flags_reg_operand" "")
+ (const_int 0)])
+ (match_operand:QI 2 "register_operand" "r,0")
+ (match_operand:QI 3 "register_operand" "0,r")))]
+ "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 2)
+ (match_dup 3)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
+(define_expand "movsfcc"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (match_operand 1 "comparison_operator" "")
+ (match_operand:SF 2 "register_operand" "")
+ (match_operand:SF 3 "register_operand" "")))]
+ "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
+ "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+
+(define_insn "*movsfcc_1_387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+ (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "TARGET_80387 && TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+ (set_attr "mode" "SF,SF,SI,SI")])
+
+(define_expand "movdfcc"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (if_then_else:DF (match_operand 1 "comparison_operator" "")
+ (match_operand:DF 2 "register_operand" "")
+ (match_operand:DF 3 "register_operand" "")))]
+ "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+
+(define_insn "*movdfcc_1"
+ [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ #
+ #"
+ [(set_attr "type" "fcmov,fcmov,multi,multi")
+ (set_attr "mode" "DF")])
+
+(define_insn "*movdfcc_1_rex64"
+ [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+ && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+ (set_attr "mode" "DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(match_operand 4 "flags_reg_operand" "")
+ (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "")
+ (match_operand:DF 3 "nonimmediate_operand" "")))]
+ "!TARGET_64BIT && reload_completed"
+ [(set (match_dup 2)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 5)
+ (match_dup 7)))
+ (set (match_dup 3)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 6)
+ (match_dup 8)))]
+ "split_di (operands+2, 1, operands+5, operands+6);
+ split_di (operands+3, 1, operands+7, operands+8);
+ split_di (operands, 1, operands+2, operands+3);")
+
+(define_expand "movxfcc"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (if_then_else:XF (match_operand 1 "comparison_operator" "")
+ (match_operand:XF 2 "register_operand" "")
+ (match_operand:XF 3 "register_operand" "")))]
+ "TARGET_80387 && TARGET_CMOVE"
+ "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+
+(define_insn "*movxfcc_1"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:XF 2 "register_operand" "f,0")
+ (match_operand:XF 3 "register_operand" "0,f")))]
+ "TARGET_80387 && TARGET_CMOVE"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov")
+ (set_attr "mode" "XF")])
+
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "sminsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH"
+ "minss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "smaxsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH"
+ "maxss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "smindf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "smaxdf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE_MATH"
+ "minss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE_MATH"
+ "maxss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+;; Make two stack loads independent:
+;; fld aa fld aa
+;; fld %st(0) -> fld bb
+;; fmul bb fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+ [(set (match_operand 0 "fp_register_operand" "")
+ (match_operand 1 "fp_register_operand" ""))
+ (set (match_dup 0)
+ (match_operator 2 "binary_fp_operator"
+ [(match_dup 0)
+ (match_operand 3 "memory_operand" "")]))]
+ "REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_dup 0) (match_dup 3))
+ (set (match_dup 0) (match_dup 4))]
+
+ ;; The % modifier is not operational anymore in peephole2's, so we have to
+ ;; swap the operands manually in the case of addition and multiplication.
+ "if (COMMUTATIVE_ARITH_P (operands[2]))
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[0], operands[1]);
+ else
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[1], operands[0]);")
+
+;; Conditional addition patterns
+(define_expand "addqicc"
+ [(match_operand:QI 0 "register_operand" "")
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:QI 2 "register_operand" "")
+ (match_operand:QI 3 "const_int_operand" "")]
+ ""
+ "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+
+(define_expand "addhicc"
+ [(match_operand:HI 0 "register_operand" "")
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:HI 2 "register_operand" "")
+ (match_operand:HI 3 "const_int_operand" "")]
+ ""
+ "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+
+(define_expand "addsicc"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:SI 2 "register_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ ""
+ "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+
+(define_expand "adddicc"
+ [(match_operand:DI 0 "register_operand" "")
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:DI 2 "register_operand" "")
+ (match_operand:DI 3 "const_int_operand" "")]
+ "TARGET_64BIT"
+ "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+
+
+;; Misc patterns (?)
+
+;; This pattern exists to put a dependency on all ebp-based memory accesses.
+;; Otherwise there will be nothing to keep
+;;
+;; [(set (reg ebp) (reg esp))]
+;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
+;; (clobber (eflags)]
+;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
+;;
+;; in proper program order.
+(define_insn "pro_epilogue_adjust_stack_1"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (plus:SI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand:SI 2 "immediate_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOV:
+ return "mov{l}\t{%1, %0|%0, %1}";
+
+ case TYPE_ALU:
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "0")
+ (const_string "alu")
+ (match_operand:SI 2 "const0_operand" "")
+ (const_string "imov")
+ ]
+ (const_string "lea")))
+ (set_attr "mode" "SI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (plus:DI (match_operand:DI 1 "register_operand" "0,r")
+ (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOV:
+ return "mov{q}\t{%1, %0|%0, %1}";
+
+ case TYPE_ALU:
+ if (GET_CODE (operands[2]) == CONST_INT
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "0")
+ (const_string "alu")
+ (match_operand:DI 2 "const0_operand" "")
+ (const_string "imov")
+ ]
+ (const_string "lea")))
+ (set_attr "mode" "DI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (plus:DI (match_operand:DI 1 "register_operand" "0,r")
+ (match_operand:DI 3 "immediate_operand" "i,i")))
+ (use (match_operand:DI 2 "register_operand" "r,r"))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ return "add{q}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]);
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "alu,lea")
+ (set_attr "mode" "DI")])
+
+(define_expand "allocate_stack_worker"
+ [(match_operand:SI 0 "register_operand" "")]
+ "TARGET_STACK_PROBE"
+{
+ if (reload_completed)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0]));
+ else
+ emit_insn (gen_allocate_stack_worker_postreload (operands[0]));
+ }
+ else
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_allocate_stack_worker_rex64 (operands[0]));
+ else
+ emit_insn (gen_allocate_stack_worker_1 (operands[0]));
+ }
+ DONE;
+})
+
+(define_insn "allocate_stack_worker_1"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")]
+ UNSPECV_STACK_PROBE)
+ (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
+ (clobber (match_scratch:SI 1 "=0"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_STACK_PROBE"
+ "call\t__alloca"
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_expand "allocate_stack_worker_postreload"
+ [(parallel [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")]
+ UNSPECV_STACK_PROBE)
+ (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
+ (clobber (match_dup 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "allocate_stack_worker_rex64"
+ [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")]
+ UNSPECV_STACK_PROBE)
+ (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0)))
+ (clobber (match_scratch:DI 1 "=0"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_STACK_PROBE"
+ "call\t__alloca"
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_expand "allocate_stack_worker_rex64_postreload"
+ [(parallel [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")]
+ UNSPECV_STACK_PROBE)
+ (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0)))
+ (clobber (match_dup 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_expand "allocate_stack"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+ (minus:SI (reg:SI SP_REG)
+ (match_operand:SI 1 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (reg:SI SP_REG)
+ (minus:SI (reg:SI SP_REG) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_STACK_PROBE"
+{
+#ifdef CHECK_STACK_LIMIT
+ if (GET_CODE (operands[1]) == CONST_INT
+ && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
+ emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ operands[1]));
+ else
+#endif
+ emit_insn (gen_allocate_stack_worker (copy_to_mode_reg (SImode,
+ operands[1])));
+
+ emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+ DONE;
+})
+
+(define_expand "builtin_setjmp_receiver"
+ [(label_ref (match_operand 0 "" ""))]
+ "!TARGET_64BIT && flag_pic"
+{
+ if (TARGET_MACHO)
+ {
+ rtx xops[3];
+ rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+ rtx label_rtx = gen_label_rtx ();
+ emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+ xops[0] = xops[1] = picreg;
+ xops[2] = gen_rtx_CONST (SImode,
+ gen_rtx_MINUS (SImode,
+ gen_rtx_LABEL_REF (SImode, label_rtx),
+ gen_rtx_SYMBOL_REF (SImode, GOT_SYMBOL_NAME)));
+ ix86_expand_binary_operator (MINUS, SImode, xops);
+ }
+ else
+ emit_insn (gen_set_got (pic_offset_table_rtx));
+ DONE;
+})
+
+;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "promotable_binary_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "aligned_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && ((GET_MODE (operands[0]) == HImode
+ && ((!optimize_size && !TARGET_FAST_PREFIX)
+ /* ??? next two lines just !satisfies_constraint_K (...) */
+ || GET_CODE (operands[2]) != CONST_INT
+ || satisfies_constraint_K (operands[2])))
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode || optimize_size)))"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ if (GET_CODE (operands[3]) != ASHIFT)
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ PUT_MODE (operands[3], SImode);")
+
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(and (match_operand 3 "aligned_operand" "")
+ (match_operand 4 "const_int_operand" ""))
+ (const_int 0)]))
+ (set (match_operand 1 "register_operand" "")
+ (and (match_dup 3) (match_dup 4)))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)
+ && ! optimize_size
+ && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
+ || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (and:SI (match_dup 3) (match_dup 4)))])]
+{
+ operands[4]
+ = gen_int_mode (INTVAL (operands[4])
+ & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand:HI 2 "aligned_operand" "")
+ (match_operand:HI 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)
+ && ! TARGET_FAST_PREFIX
+ && ! optimize_size"
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+ (const_int 0)]))]
+{
+ operands[3]
+ = gen_int_mode (INTVAL (operands[3])
+ & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+})
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (neg (match_operand 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode || optimize_size)))"
+ [(parallel [(set (match_dup 0)
+ (neg:SI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (not (match_operand 1 "register_operand" "")))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode || optimize_size)))"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (if_then_else (match_operator 1 "comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand 2 "register_operand" "")
+ (match_operand 3 "register_operand" "")))]
+ "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode || optimize_size)))"
+ [(set (match_dup 0)
+ (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);")
+
+
+;; RTL Peephole optimizations, run before sched2. These primarily look to
+;; transform a complex memory operation into two memory to register operations.
+
+;; Don't push memory operands
+(define_peephole2
+ [(set (match_operand:SI 0 "push_operand" "")
+ (match_operand:SI 1 "memory_operand" ""))
+ (match_scratch:SI 2 "r")]
+ "!optimize_size && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "memory_operand" ""))
+ (match_scratch:DI 2 "r")]
+ "!optimize_size && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to handle SFmode only, because DFmode and XFmode is split to
+;; SImode pushes.
+(define_peephole2
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "memory_operand" ""))
+ (match_scratch:SF 2 "r")]
+ "!optimize_size && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:HI 0 "push_operand" "")
+ (match_operand:HI 1 "memory_operand" ""))
+ (match_scratch:HI 2 "r")]
+ "!optimize_size && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:QI 0 "push_operand" "")
+ (match_operand:QI 1 "memory_operand" ""))
+ (match_scratch:QI 2 "q")]
+ "!optimize_size && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Don't move an immediate directly to memory when the instruction
+;; gets too big.
+(define_peephole2
+ [(match_scratch:SI 1 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
+ (const_int 0))]
+ "! optimize_size
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cost->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 1) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "")
+
+(define_peephole2
+ [(match_scratch:HI 1 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
+ (const_int 0))]
+ "! optimize_size
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cost->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 2) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+ [(match_scratch:QI 1 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
+ (const_int 0))]
+ "! optimize_size
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cost->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 2) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
+ (match_operand:SI 1 "immediate_operand" ""))]
+ "! optimize_size
+ && get_attr_length (insn) >= ix86_cost->large_insn
+ && TARGET_SPLIT_LONG_MOVES"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:HI 2 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
+ (match_operand:HI 1 "immediate_operand" ""))]
+ "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
+ && TARGET_SPLIT_LONG_MOVES"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:QI 2 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
+ (match_operand:QI 1 "immediate_operand" ""))]
+ "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
+ && TARGET_SPLIT_LONG_MOVES"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Don't compare memory with zero, load and use a test instead.
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(match_operand:SI 2 "memory_operand" "")
+ (const_int 0)]))
+ (match_scratch:SI 3 "r")]
+ "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size"
+ [(set (match_dup 3) (match_dup 2))
+ (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
+ "")
+
+;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
+;; Don't split NOTs with a displacement operand, because resulting XOR
+;; will not be pairable anyway.
+;;
+;; On AMD K6, NOT is vector decoded with memory operand that cannot be
+;; represented using a modRM byte. The XOR replacement is long decoded,
+;; so this split helps here as well.
+;;
+;; Note: Can't do this as a regular split because we can't get proper
+;; lifetime information then.
+
+(define_peephole2
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "!optimize_size
+ && peep2_regno_dead_p (0, FLAGS_REG)
+ && ((TARGET_PENTIUM
+ && (GET_CODE (operands[0]) != MEM
+ || !memory_displacement_operand (operands[0], SImode)))
+ || (TARGET_K6 && long_memory_operand (operands[0], SImode)))"
+ [(parallel [(set (match_dup 0)
+ (xor:SI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "!optimize_size
+ && peep2_regno_dead_p (0, FLAGS_REG)
+ && ((TARGET_PENTIUM
+ && (GET_CODE (operands[0]) != MEM
+ || !memory_displacement_operand (operands[0], HImode)))
+ || (TARGET_K6 && long_memory_operand (operands[0], HImode)))"
+ [(parallel [(set (match_dup 0)
+ (xor:HI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+ "!optimize_size
+ && peep2_regno_dead_p (0, FLAGS_REG)
+ && ((TARGET_PENTIUM
+ && (GET_CODE (operands[0]) != MEM
+ || !memory_displacement_operand (operands[0], QImode)))
+ || (TARGET_K6 && long_memory_operand (operands[0], QImode)))"
+ [(parallel [(set (match_dup 0)
+ (xor:QI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; Non pairable "test imm, reg" instructions can be translated to
+;; "and imm, reg" if reg dies. The "and" form is also shorter (one
+;; byte opcode instead of two, have a short form for byte operands),
+;; so do it for other CPUs as well. Given that the value was dead,
+;; this should not create any new dependencies. Pass on the sub-word
+;; versions if we're concerned about partial register stalls.
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:SI (match_operand:SI 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" ""))
+ (const_int 0)]))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (true_regnum (operands[2]) != 0
+ || satisfies_constraint_K (operands[3]))
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+ (const_int 0)]))
+ (set (match_dup 2)
+ (and:SI (match_dup 2) (match_dup 3)))])]
+ "")
+
+;; We don't need to handle HImode case, because it will be promoted to SImode
+;; on ! TARGET_PARTIAL_REG_STALL
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:QI (match_operand:QI 2 "register_operand" "")
+ (match_operand:QI 3 "immediate_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL
+ && ix86_match_ccmode (insn, CCNOmode)
+ && true_regnum (operands[2]) != 0
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+ (const_int 0)]))
+ (set (match_dup 2)
+ (and:QI (match_dup 2) (match_dup 3)))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:SI
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL
+ && ix86_match_ccmode (insn, CCNOmode)
+ && true_regnum (operands[2]) != 0
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 1
+ [(and:SI
+ (zero_extract:SI
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 3))
+ (const_int 0)]))
+ (set (zero_extract:SI (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 3)))])]
+ "")
+
+;; Don't do logical operations with memory inputs.
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_dup 0)
+ (match_operand:SI 1 "memory_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "! optimize_size && ! TARGET_READ_MODIFY"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_operand:SI 1 "memory_operand" "")
+ (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "! optimize_size && ! TARGET_READ_MODIFY"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+; Don't do logical operations with memory outputs
+;
+; These two don't make sense for PPro/PII -- we're expanding a 4-uop
+; instruction into two 1-uop insns plus a 2-uop insn. That last has
+; the same decoder scheduling characteristics as the original.
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_dup 0)
+ (match_operand:SI 1 "nonmemory_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel [(set (match_dup 2)
+ (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_operand:SI 1 "nonmemory_operand" "")
+ (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel [(set (match_dup 2)
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Attempt to always use XOR for zeroing registers.
+(define_peephole2
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "const0_operand" ""))]
+ "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+ && (! TARGET_USE_MOV0 || optimize_size)
+ && GENERAL_REG_P (operands[0])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[0] = gen_lowpart (word_mode, operands[0]);
+})
+
+(define_peephole2
+ [(set (strict_low_part (match_operand 0 "register_operand" ""))
+ (const_int 0))]
+ "(GET_MODE (operands[0]) == QImode
+ || GET_MODE (operands[0]) == HImode)
+ && (! TARGET_USE_MOV0 || optimize_size)
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+ [(set (match_operand 0 "register_operand" "")
+ (const_int -1))]
+ "(GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == SImode
+ || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
+ && (optimize_size || TARGET_PENTIUM)
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (const_int -1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode,
+ operands[0]);")
+
+;; Attempt to convert simple leas to adds. These can be created by
+;; move expanders.
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (plus:SI (match_dup 0)
+ (match_operand:SI 1 "nonmemory_operand" "")))]
+ "peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonmemory_operand" "")) 0))]
+ "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])"
+ [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (match_dup 0)
+ (match_operand:DI 1 "x86_64_general_operand" "")))]
+ "peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_dup 0)
+ (match_operand:SI 1 "const_int_operand" "")))]
+ "exact_log2 (INTVAL (operands[1])) >= 0
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_dup 0)
+ (match_operand:DI 1 "const_int_operand" "")))]
+ "exact_log2 (INTVAL (operands[1])) >= 0
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")) 0))]
+ "exact_log2 (INTVAL (operands[2])) >= 0
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+;; The ESP adjustments can be done by the push and pop instructions. Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On
+;; many CPUs it is also faster, since special hardware to avoid esp
+;; dependencies is present.
+
+;; While some of these conversions may be done using splitters, we use peepholes
+;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+
+;; Convert prologue esp subtractions to push.
+;; We need register to push. In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live. First choice will also most likely in
+;; fewer dependencies. On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead. We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_ADD_ESP_4"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_ADD_ESP_8"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Convert compares with 1 to shorter inc/dec operations when CF is not
+;; required and register dies. Similarly for 128 to plus -128.
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(match_operand 2 "register_operand" "")
+ (match_operand 3 "const_int_operand" "")]))]
+ "(INTVAL (operands[3]) == -1
+ || INTVAL (operands[3]) == 1
+ || INTVAL (operands[3]) == 128)
+ && ix86_match_ccmode (insn, CCGCmode)
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
+ (clobber (match_dup 2))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_ADD_ESP_4"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (match_scratch:DI 1 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size || !TARGET_ADD_ESP_8"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (match_scratch:DI 1 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+ (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+ (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Convert imul by three, five and nine into lea
+(define_peephole2
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9"
+ [(set (match_dup 0)
+ (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!optimize_size
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 0)
+ (plus:SI (mult:SI (match_dup 0) (match_dup 2))
+ (match_dup 0)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0)
+ (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT
+ && !optimize_size
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 0)
+ (plus:DI (mult:DI (match_dup 0) (match_dup 2))
+ (match_dup 0)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+;; Imul $32bit_imm, mem, reg is vector decoded, while
+;; imul $32bit_imm, reg, reg is direct decoded.
+(define_peephole2
+ [(match_scratch:DI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "memory_operand" "")
+ (match_operand:DI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "memory_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "memory_operand" "")
+ (match_operand:SI 2 "immediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+;; imul $8/16bit_imm, regmem, reg is vector decoded.
+;; Convert it into imul reg, reg
+;; It would be better to force assembler to encode instruction using long
+;; immediate, but there is apparently no way to do so.
+(define_peephole2
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:DI 3 "r")]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ && satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:SI 3 "r")]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ && satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:HI 3 "r")]
+ "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;; sall $2, %eax
+;; movl 4(%esp), %edx
+;; addl %edx, %eax
+;; instead of pre-splitting:
+;; sall $2, %eax
+;; addl 4(%esp), %eax
+;; Turn it into:
+;; movl 4(%esp), %edx
+;; leal (%edx,%eax,4), %eax
+
+(define_peephole2
+ [(parallel [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand 3 "register_operand")
+ (match_operand 4 "x86_64_general_operand" ""))
+ (parallel [(set (match_operand 5 "register_operand" "")
+ (plus (match_operand 6 "register_operand" "")
+ (match_operand 7 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
+ /* Validate MODE for lea. */
+ && ((!TARGET_PARTIAL_REG_STALL
+ && (GET_MODE (operands[0]) == QImode
+ || GET_MODE (operands[0]) == HImode))
+ || GET_MODE (operands[0]) == SImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+ /* We reorder load and the shift. */
+ && !rtx_equal_p (operands[1], operands[3])
+ && !reg_overlap_mentioned_p (operands[0], operands[4])
+ /* Last PLUS must consist of operand 0 and 3. */
+ && !rtx_equal_p (operands[0], operands[3])
+ && (rtx_equal_p (operands[3], operands[6])
+ || rtx_equal_p (operands[3], operands[7]))
+ && (rtx_equal_p (operands[0], operands[6])
+ || rtx_equal_p (operands[0], operands[7]))
+ /* The intermediate operand 0 must die or be same as output. */
+ && (rtx_equal_p (operands[0], operands[5])
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 0) (match_dup 1))]
+{
+ enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+ int scale = 1 << INTVAL (operands[2]);
+ rtx index = gen_lowpart (Pmode, operands[1]);
+ rtx base = gen_lowpart (Pmode, operands[3]);
+ rtx dest = gen_lowpart (mode, operands[5]);
+
+ operands[1] = gen_rtx_PLUS (Pmode, base,
+ gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+ if (mode != Pmode)
+ operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+ operands[0] = dest;
+})
+
+;; Call-value patterns last so that the wildcard operand does not
+;; disrupt insn-recog's switch tables.
+
+(define_insn "*call_value_pop_0"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "i")))]
+ "!TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ {
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+ }
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%A1";
+ else
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "const_int_operand" "")))]
+ "TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+ (match_operand:SI 2 "" "")))]
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "call\t%P1";
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand:SI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "jmp\t%P1";
+ return "jmp\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+ (match_operand:DI 2 "" "")))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "call\t%P1";
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P1"
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64_v"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (reg:DI 40))
+ (match_operand:DI 1 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t*%%r11"
+ [(set_attr "type" "callv")])
+
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often
+;; caught for use by garbage collectors and the like. Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 6))]
+ ""
+ { return ASM_SHORT "0x0b0f"; }
+ [(set_attr "length" "2")])
+
+(define_expand "sse_prologue_save"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(reg:DI 21)
+ (reg:DI 22)
+ (reg:DI 23)
+ (reg:DI 24)
+ (reg:DI 25)
+ (reg:DI 26)
+ (reg:DI 27)
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (use (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:DI 2 "immediate_operand" ""))
+ (use (label_ref:DI (match_operand 3 "" "")))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*sse_prologue_save_insn"
+ [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+ (match_operand:DI 4 "const_int_operand" "n")))
+ (unspec:BLK [(reg:DI 21)
+ (reg:DI 22)
+ (reg:DI 23)
+ (reg:DI 24)
+ (reg:DI 25)
+ (reg:DI 26)
+ (reg:DI 27)
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (use (match_operand:DI 1 "register_operand" "r"))
+ (use (match_operand:DI 2 "const_int_operand" "i"))
+ (use (label_ref:DI (match_operand 3 "" "X")))]
+ "TARGET_64BIT
+ && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
+ && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
+ "*
+{
+ int i;
+ operands[0] = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+ output_asm_insn (\"jmp\\t%A1\", operands);
+ for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+ {
+ operands[4] = adjust_address (operands[0], DImode, i*16);
+ operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
+ PUT_MODE (operands[4], TImode);
+ if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
+ output_asm_insn (\"rex\", operands);
+ output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
+ }
+ (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+ CODE_LABEL_NUMBER (operands[3]));
+ RET;
+}
+ "
+ [(set_attr "type" "other")
+ (set_attr "length_immediate" "0")
+ (set_attr "length_address" "0")
+ (set_attr "length" "135")
+ (set_attr "memory" "store")
+ (set_attr "modrm" "0")
+ (set_attr "mode" "DI")])
+
+(define_expand "prefetch"
+ [(prefetch (match_operand 0 "address_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+ int rw = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+
+ gcc_assert (rw == 0 || rw == 1);
+ gcc_assert (locality >= 0 && locality <= 3);
+ gcc_assert (GET_MODE (operands[0]) == Pmode
+ || GET_MODE (operands[0]) == VOIDmode);
+
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ supported by SSE counterpart or the SSE prefetch is not available
+ (K6 machines). Otherwise use SSE prefetch as it allows specifying
+ of locality. */
+ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+ operands[2] = GEN_INT (3);
+ else
+ operands[1] = const0_rtx;
+})
+
+(define_insn "*prefetch_sse"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE && !TARGET_64BIT"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ gcc_assert (locality >= 0 && locality <= 3);
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_sse_rex"
+ [(prefetch (match_operand:DI 0 "address_operand" "p")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE && TARGET_64BIT"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ gcc_assert (locality >= 0 && locality <= 3);
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 3))]
+ "TARGET_3DNOW && !TARGET_64BIT"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow_rex"
+ [(prefetch (match_operand:DI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 3))]
+ "TARGET_3DNOW && TARGET_64BIT"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
+
+(define_expand "stack_protect_set"
+ [(match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" "")]
+ ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_tls_protect_set_di (operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+ else
+ emit_insn (gen_stack_tls_protect_set_si (operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_protect_set_di (operands[0], operands[1]));
+ else
+ emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+#endif
+ DONE;
+})
+
+(define_insn "stack_protect_set_si"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+ (set (match_scratch:SI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_set_di"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+ (set (match_scratch:DI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_si"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+ (set (match_scratch:SI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_di"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+ (set (match_scratch:DI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ else
+ return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ }
+ [(set_attr "type" "multi")])
+
+(define_expand "stack_protect_test"
+ [(match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" "")
+ (match_operand 2 "" "")]
+ ""
+{
+ rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ ix86_compare_emitted = flags;
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_tls_protect_test_di (flags, operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+ else
+ emit_insn (gen_stack_tls_protect_test_si (flags, operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1]));
+ else
+ emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1]));
+#endif
+ emit_jump_insn (gen_beq (operands[2]));
+ DONE;
+})
+
+(define_insn "stack_protect_test_si"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+ (match_operand:SI 2 "memory_operand" "m")]
+ UNSPEC_SP_TEST))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ ""
+ "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_test_di"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+ (match_operand:DI 2 "memory_operand" "m")]
+ UNSPEC_SP_TEST))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_64BIT"
+ "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_si"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand" "i")]
+ UNSPEC_SP_TLS_TEST))
+ (clobber (match_scratch:SI 3 "=r"))]
+ ""
+ "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_di"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+ (match_operand:DI 2 "const_int_operand" "i")]
+ UNSPEC_SP_TLS_TEST))
+ (clobber (match_scratch:DI 3 "=r"))]
+ "TARGET_64BIT"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}";
+ else
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}";
+ }
+ [(set_attr "type" "multi")])
+
+; APPLE LOCAL begin 3399553
+
+; Expand the builtin FLT_ROUNDS by reading the x87 FPSR rounding bits.
+
+(define_expand "flt_rounds"
+ [(set (match_operand 0 "nonimmediate_operand" "")
+ (unspec:SI [(reg:CCFP FPSR_REG)] UNSPEC_FLT_ROUNDS))]
+ ""
+ "
+ {
+ ix86_expand_flt_rounds (operands[0]);
+ DONE;
+ }
+ "
+)
+; APPLE LOCAL end 3399553
+; APPLE LOCAL begin mainline
+(include "mmx.md")
+(include "sse.md")
+; APPLE LOCAL end mainline
+(include "sync.md")
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_mode_macro CRC32MODE [QI HI SI])
+(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
+(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
+
+(define_insn "sse4_2_crc32<mode>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:SI 1 "register_operand" "0")
+ (match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
+ UNSPEC_CRC32))]
+ "TARGET_SSE4_2"
+ "crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")]
+ UNSPEC_CRC32))]
+ "TARGET_SSE4_2 && TARGET_64BIT"
+ "crc32q\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 5612787 mainline sse4
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt b/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt
new file mode 100644
index 000000000..578ea36e7
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.opt
@@ -0,0 +1,262 @@
+; Options for the IA-32 and AMD64 ports of the compiler.
+
+; Copyright (C) 2005 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 2, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING. If not, write to the Free
+; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+; 02110-1301, USA.
+
+m128bit-long-double
+Target RejectNegative Report Mask(128BIT_LONG_DOUBLE)
+sizeof(long double) is 16
+
+m32
+Target RejectNegative Negative(m64) Report InverseMask(64BIT)
+Generate 32bit i386 code
+
+m386
+Target RejectNegative Undocumented
+;; Deprecated
+
+m3dnow
+Target Report Mask(3DNOW)
+Support 3DNow! built-in functions
+
+m486
+Target RejectNegative Undocumented
+;; Deprecated
+
+m64
+Target RejectNegative Negative(m32) Report Mask(64BIT)
+Generate 64bit x86-64 code
+
+m80387
+Target Report Mask(80387)
+Use hardware fp
+
+m96bit-long-double
+Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE)
+sizeof(long double) is 12
+
+maccumulate-outgoing-args
+Target Report Mask(ACCUMULATE_OUTGOING_ARGS)
+Reserve space for outgoing arguments in the function prologue
+
+malign-double
+Target Report Mask(ALIGN_DOUBLE)
+Align some doubles on dword boundary
+
+malign-functions=
+Target RejectNegative Joined Var(ix86_align_funcs_string)
+Function starts are aligned to this power of 2
+
+malign-jumps=
+Target RejectNegative Joined Var(ix86_align_jumps_string)
+Jump targets are aligned to this power of 2
+
+malign-loops=
+Target RejectNegative Joined Var(ix86_align_loops_string)
+Loop code aligned to this power of 2
+
+malign-stringops
+Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS)
+Align destination of the string operations
+
+march=
+Target RejectNegative Joined Var(ix86_arch_string)
+Generate code for given CPU
+
+masm=
+Target RejectNegative Joined Var(ix86_asm_string)
+Use given assembler dialect
+
+mbranch-cost=
+Target RejectNegative Joined Var(ix86_branch_cost_string)
+Branches are this expensive (1-5, arbitrary units)
+
+mlarge-data-threshold=
+Target RejectNegative Joined Var(ix86_section_threshold_string)
+Data greater than given threshold will go into .ldata section in x86-64 medium model
+
+mcmodel=
+Target RejectNegative Joined Var(ix86_cmodel_string)
+Use given x86-64 code model
+
+mdebug-addr
+Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented
+
+mdebug-arg
+Target RejectNegative Var(TARGET_DEBUG_ARG) Undocumented
+
+mfancy-math-387
+Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387)
+Generate sin, cos, sqrt for FPU
+
+mfp-ret-in-387
+Target Report Mask(FLOAT_RETURNS)
+Return values of functions in FPU registers
+
+mfpmath=
+Target RejectNegative Joined Var(ix86_fpmath_string)
+Generate floating point mathematics using given instruction set
+
+mhard-float
+Target RejectNegative Mask(80387) MaskExists
+Use hardware fp
+
+mieee-fp
+Target Report Mask(IEEE_FP)
+Use IEEE math for fp comparisons
+
+minline-all-stringops
+Target Report Mask(INLINE_ALL_STRINGOPS)
+Inline all known string operations
+
+mintel-syntax
+Target Undocumented
+;; Deprecated
+
+mmmx
+Target Report Mask(MMX)
+Support MMX built-in functions
+
+mms-bitfields
+Target Report Mask(MS_BITFIELD_LAYOUT)
+Use native (MS) bitfield layout
+
+mno-align-stringops
+Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented
+
+mno-fancy-math-387
+Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented
+
+mno-push-args
+Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented
+
+mno-red-zone
+Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER)
+Omit the frame pointer in leaf functions
+
+mpentium
+Target RejectNegative Undocumented
+;; Deprecated
+
+mpentiumpro
+Target RejectNegative Undocumented
+;; Deprecated
+
+mpreferred-stack-boundary=
+Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string)
+Attempt to keep stack aligned to this power of 2
+
+mpush-args
+Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS)
+Use push instructions to save outgoing arguments
+
+mred-zone
+Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE)
+Use red-zone in the x86-64 code
+
+mregparm=
+Target RejectNegative Joined Var(ix86_regparm_string)
+Number of registers used to pass integer arguments
+
+mrtd
+Target Report Mask(RTD)
+Alternate calling convention
+
+msoft-float
+Target InverseMask(80387)
+Do not use hardware fp
+
+msse
+Target Report Mask(SSE)
+Support MMX and SSE built-in functions and code generation
+
+msse2
+Target Report Mask(SSE2)
+Support MMX, SSE and SSE2 built-in functions and code generation
+
+msse3
+Target Report Mask(SSE3)
+Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
+
+; APPLE LOCAL begin mainline
+mssse3
+Target Report Mask(SSSE3)
+Support SSSE3 built-in functions and code generation
+; APPLE LOCAL end mainline
+
+msseregparm
+Target RejectNegative Mask(SSEREGPARM)
+Use SSE register passing conventions for SF and DF mode
+
+mstackrealign
+Target Report Var(ix86_force_align_arg_pointer)
+Realign stack in prologue
+
+msvr3-shlib
+Target Report Mask(SVR3_SHLIB)
+Uninitialized locals in .bss
+
+mstack-arg-probe
+Target Report Mask(STACK_PROBE)
+Enable stack probing
+
+mtls-dialect=
+Target RejectNegative Joined Var(ix86_tls_dialect_string)
+Use given thread-local storage dialect
+
+mtls-direct-seg-refs
+Target Report Mask(TLS_DIRECT_SEG_REFS)
+Use direct references against %gs when accessing tls data
+
+mtune=
+Target RejectNegative Joined Var(ix86_tune_string)
+Schedule code for given CPU
+
+;; Support Athlon 3Dnow builtins
+Mask(3DNOW_A)
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; Var(ix86_isa_flags)
+msse4.1
+Target Report Mask(SSE4_1) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
+
+;; Var(ix86_isa_flags)
+msse4.2
+Target Report Mask(SSE4_2) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+;; Var(ix86_isa_flags)
+msse4
+Target RejectNegative Report Mask(SSE4_2) MaskExists VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+;; Var(ix86_isa_flags)
+mno-sse4
+Target RejectNegative Report InverseMask(SSE4_1) MaskExists VarExists
+Do not support SSE4.1 and SSE4.2 built-in functions and code generation
+
+;; Var(ix86_isa_flags)
+msse4a
+Target Report Mask(SSE4A) VarExists
+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
+;; APPLE LOCAL end 5612787 mainline sse4
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/k6.md b/gcc-4.2.1-5666.3/gcc/config/i386/k6.md
new file mode 100644
index 000000000..e0006aebb
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/k6.md
@@ -0,0 +1,268 @@
+;; AMD K6/K6-2 Scheduling
+;; Copyright (C) 2002, 2004
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+;;
+;; The K6 architecture is quite similar to PPro. Important difference is
+;; that there are only two decoders and they seems to be much slower than
+;; any of the execution units. So we have to pay much more attention to
+;; proper scheduling for the decoders.
+;; FIXME: We don't do that right now. A good start would be to sort the
+;; instructions based on length.
+;;
+;; This description is based on data from the following documents:
+;;
+;; "AMD-K6 Processor Data Sheet (Preliminary information)"
+;; Advanced Micro Devices, Inc., 1998.
+;;
+;; "AMD-K6 Processor Code Optimization Application Note"
+;; Advanced Micro Devices, Inc., 2000.
+;;
+;; CPU execution units of the K6:
+;;
+;; store describes the Store unit. This unit is not modelled
+;; completely and it is only used to model lea operation.
+;; Otherwise it lies outside of any critical path.
+;; load describes the Load unit
+;; alux describes the Integer X unit
+;; mm describes the Multimedia unit, which shares a pipe
+;; with the Integer X unit. This unit is used for MMX,
+;; which is not implemented for K6.
+;; aluy describes the Integer Y unit
+;; fpu describes the FPU unit
+;; branch describes the Branch unit
+;;
+;; The fp unit is not pipelined, and it can only do one operation per two
+;; cycles, including fxcg.
+;;
+;; Generally this is a very poor description, but at least no worse than
+;; the old description, and a lot easier to extend to something more
+;; reasonable if anyone still cares enough about this architecture in 2004.
+;;
+;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
+
+(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit")
+
+;; The K6 instruction decoding begins before the on-chip instruction cache is
+;; filled. Depending on the length of the instruction, two simple instructions
+;; can be decoded in two parallel short decoders, or one complex instruction can
+;; be decoded in either the long or the vector decoder. For all practical
+;; purposes, the long and vector decoder can be modelled as one decoder.
+(define_cpu_unit "k6_decode_short0" "k6_decoder")
+(define_cpu_unit "k6_decode_short1" "k6_decoder")
+(define_cpu_unit "k6_decode_long" "k6_decoder")
+(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1")
+(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1")
+(define_reservation "k6_decode_vector" "k6_decode_long")
+
+(define_cpu_unit "k6_store" "k6_store_unit")
+(define_cpu_unit "k6_load" "k6_load_unit")
+(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units")
+(define_cpu_unit "k6_fpu" "k6_fpu_unit")
+(define_cpu_unit "k6_branch" "k6_branch_unit")
+
+;; Shift instructions and certain arithmetic are issued only on Integer X.
+(define_insn_reservation "k6_alux_only" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_alux")
+
+(define_insn_reservation "k6_alux_only_load" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load,k6_alux")
+
+(define_insn_reservation "k6_alux_only_store" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_long,k6_load,k6_alux,k6_store")
+
+;; Integer divide and multiply can only be issued on Integer X, too.
+(define_insn_reservation "k6_alu_imul" 2
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "imul"))
+ "k6_decode_vector,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_load" 4
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load")))
+ "k6_decode_vector,k6_load,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_store" 4
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_vector,k6_load,k6_alux*3,k6_store")
+
+;; ??? Guessed latencies based on the old pipeline description.
+(define_insn_reservation "k6_alu_idiv" 17
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none")))
+ "k6_decode_vector,k6_alux*17")
+
+(define_insn_reservation "k6_alu_idiv_mem" 19
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "!none")))
+ "k6_decode_vector,k6_load,k6_alux*17")
+
+;; Basic word and doubleword ALU ops can be issued on both Integer units.
+(define_insn_reservation "k6_alu" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_load" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_store" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store")
+
+;; A "load immediate" operation does not require execution at all,
+;; it is available immediately after decoding. Special-case this.
+(define_insn_reservation "k6_alu_imov" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "memory" "none")
+ (match_operand 1 "nonimmediate_operand"))))
+ "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_imov_imm" 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "memory" "none")
+ (match_operand 1 "immediate_operand"))))
+ "k6_decode_short")
+
+(define_insn_reservation "k6_alu_imov_load" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_alu_imov_store" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_alu_imov_both" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "both,unknown")))
+ "k6_decode_long,k6_load,k6_alux|k6_aluy")
+
+;; The branch unit.
+(define_insn_reservation "k6_branch_call" 1
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "call,callv"))
+ "k6_decode_vector,k6_branch")
+
+(define_insn_reservation "k6_branch_branch" 1
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "ibr"))
+ "k6_decode_short,k6_branch")
+
+;; The load and units have two pipeline stages. The load latency is
+;; two cycles.
+(define_insn_reservation "k6_load_pop" 3
+ (and (eq_attr "cpu" "k6")
+ (ior (eq_attr "type" "pop")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_load_leave" 5
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "leave"))
+ "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2")
+
+;; ??? From the old pipeline description. Egad!
+;; ??? Apparently we take care of this reservation in adjust_cost.
+(define_insn_reservation "k6_load_str" 10
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_vector,k6_load*10")
+
+;; The store unit handles lea and push. It is otherwise unmodelled.
+(define_insn_reservation "k6_store_lea" 2
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "lea"))
+ "k6_decode_short,k6_store,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_store_push" 2
+ (and (eq_attr "cpu" "k6")
+ (ior (eq_attr "type" "push")
+ (eq_attr "memory" "store,both")))
+ "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_store_str" 10
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "str"))
+ "k6_store*10")
+
+;; Most FPU instructions have latency 2 and throughput 2.
+(define_insn_reservation "k6_fpu" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "none")))
+ "k6_decode_vector,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_load" 6
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_store" 6
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "store")))
+ "k6_decode_short,k6_store,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul_load" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load,k6_fpu*2")
+
+;; ??? Guessed latencies from the old pipeline description.
+(define_insn_reservation "k6_fpu_expensive" 56
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "fdiv,fpspc"))
+ "k6_decode_short,k6_fpu*56")
+
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm b/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm
new file mode 100644
index 000000000..c672024bb
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/lib1funcs.asm
@@ -0,0 +1,30 @@
+# APPLE LOCAL file 4099000
+#ifndef __x86_64__
+#define THUNK(REG) \
+.private_extern ___i686.get_pc_thunk.REG ;\
+___i686.get_pc_thunk.REG: ;\
+ movl (%esp,1),%REG ;\
+ ret ;
+
+#ifdef L_get_pc_thunk_ax
+THUNK(eax)
+#endif
+#ifdef L_get_pc_thunk_dx
+THUNK(edx)
+#endif
+#ifdef L_get_pc_thunk_cx
+THUNK(ecx)
+#endif
+#ifdef L_get_pc_thunk_bx
+THUNK(ebx)
+#endif
+#ifdef L_get_pc_thunk_si
+THUNK(esi)
+#endif
+#ifdef L_get_pc_thunk_di
+THUNK(edi)
+#endif
+#ifdef L_get_pc_thunk_bp
+THUNK(ebp)
+#endif
+#endif
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h b/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h
new file mode 100644
index 000000000..7fdc6dce5
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/mm3dnow.h
@@ -0,0 +1,220 @@
+/* Copyright (C) 2004 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
+ MSVC 7.1. */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#ifdef __3dNOW__
+
+#include <mmintrin.h>
+
+/* Internal data types for implementing the intrinsics. */
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+static __inline void
+_m_femms (void)
+{
+ __builtin_ia32_femms();
+}
+
+static __inline __m64
+_m_pavgusb (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
+}
+
+static __inline __m64
+_m_pf2id (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
+}
+
+static __inline __m64
+_m_pfacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfadd (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfcmpeq (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfcmpge (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfcmpgt (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfmax (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfmin (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfmul (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfrcp (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
+}
+
+static __inline __m64
+_m_pfrcpit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfrcpit2 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfrsqrt (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
+}
+
+static __inline __m64
+_m_pfrsqit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfsub (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfsubr (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pi2fd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
+}
+
+static __inline __m64
+_m_pmulhrw (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
+}
+
+static __inline void
+_m_prefetch (void *__P)
+{
+ __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+static __inline void
+_m_prefetchw (void *__P)
+{
+ __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+}
+
+static __inline __m64
+_m_from_float (float __A)
+{
+ return (__m64)(__v2sf){ __A, 0 };
+}
+
+static __inline float
+_m_to_float (__m64 __A)
+{
+ union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A };
+ return __tmp.a[0];
+}
+
+#ifdef __3dNOW_A__
+
+static __inline __m64
+_m_pf2iw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
+}
+
+static __inline __m64
+_m_pfnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pfpnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+static __inline __m64
+_m_pi2fw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
+}
+
+static __inline __m64
+_m_pswapd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
+}
+
+#endif /* __3dNOW_A__ */
+#endif /* __3dNOW__ */
+
+#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h
new file mode 100644
index 000000000..64db0589c
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/mmintrin.h
@@ -0,0 +1,1219 @@
+/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __MMX__
+# error "MMX instruction set not enabled"
+#else
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+/* APPLE LOCAL 4505813 */
+typedef long long __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* Empty the multimedia state. */
+/* APPLE LOCAL begin radar 4152603 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_empty (void)
+{
+ __builtin_ia32_emms ();
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_empty (void)
+{
+ _mm_empty ();
+}
+
+/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi32_si64 (int __i)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_from_int (int __i)
+{
+ return _mm_cvtsi32_si64 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert I to a __m64 object. */
+
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_from_int64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_m64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64x_si64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pi64x (long long __i)
+{
+ return (__m64) __i;
+}
+#endif
+
+/* Convert the lower 32 bits of the __m64 object into an integer. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_si32 (__m64 __i)
+{
+ return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_to_int (__m64 __i)
+{
+ return _mm_cvtsi64_si32 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert the __m64 object to a 64bit integer. */
+
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_to_int64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtm64_si64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_si64x (__m64 __i)
+{
+ return (long long)__i;
+}
+#endif
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with signed saturation. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi16 (__m1, __m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+ the result, and the two 32-bit values from M2 into the upper two 16-bit
+ values of the result, all with signed saturation. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi32 (__m1, __m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with unsigned saturation. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pu16 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+ 8-bit values from the high half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+ 16-bit values from the high half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+ value from the high half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+ 8-bit values from the low half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+ 16-bit values from the low half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+ value from the low half of M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi16 (__m1, __m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifdef __SSE2__
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_paddq (__m1, __m2);
+}
+#endif
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+ saturated arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+ saturated arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi16 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+ saturated arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+ saturated arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi16 (__m1, __m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifdef __SSE2__
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psubq (__m1, __m2);
+}
+#endif
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+ saturating arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ signed saturating arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+ unsigned saturating arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ unsigned saturating arithmetic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+ four 32-bit intermediate results, which are then summed by pairs to
+ produce two 32-bit results. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_madd_pi16 (__m1, __m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+ M2 and produce the high 16 bits of the 32-bit results. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mulhi_pi16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+ the low 16 bits of the results. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mullo_pi16 (__m1, __m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psllw (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi16 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psllwi (__m64 __m, int __count)
+{
+ return _mm_slli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pslld (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi32 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pslldi (__m64 __m, int __count)
+{
+ return _mm_slli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psllq (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psllq (__m64 __m, __m64 __count)
+{
+ return _mm_sll_si64 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_slli_si64 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psllqi (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psllqi (__m64 __m, int __count)
+{
+ return _mm_slli_si64 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psraw (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi16 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrawi (__m64 __m, int __count)
+{
+ return _mm_srai_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrad (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi32 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psradi (__m64 __m, int __count)
+{
+ return _mm_srai_pi32 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrlw (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi16 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrlwi (__m64 __m, int __count)
+{
+ return _mm_srli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrld (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi32 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrldi (__m64 __m, int __count)
+{
+ return _mm_srli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrlq (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrlq (__m64 __m, __m64 __count)
+{
+ return _mm_srl_si64 (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_srli_si64 (__m64 __m, int __count)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return (__m64) __builtin_ia32_psrlqi (__m, __count);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psrlqi (__m64 __m, int __count)
+{
+ return _mm_srli_si64 (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pand (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pand (__m64 __m1, __m64 __m2)
+{
+ return _mm_and_si64 (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+ 64-bit value in M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pandn (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+ return _mm_andnot_si64 (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_por (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_por (__m64 __m1, __m64 __m2)
+{
+ return _mm_or_si64 (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pxor (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+ return _mm_xor_si64 (__m1, __m2);
+}
+
+/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
+ test is true and zero if false. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
+/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
+ the test is true and zero if false. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
+/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
+ the test is true and zero if false. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
+/* Creates a 64-bit zero. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setzero_si64 (void)
+{
+ return (__m64)0LL;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pi32 (int __i1, int __i0)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+ return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+ char __b3, char __b2, char __b1, char __b0)
+{
+ return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
+ __b4, __b5, __b6, __b7);
+}
+
+/* Similar, but with the arguments in reverse order. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_pi32 (int __i0, int __i1)
+{
+ return _mm_set_pi32 (__i1, __i0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+ return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+ char __b4, char __b5, char __b6, char __b7)
+{
+ return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_pi32 (int __i)
+{
+ return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_pi16 (short __w)
+{
+ return _mm_set_pi16 (__w, __w, __w, __w);
+}
+
+/* Creates a vector of eight 8-bit values, all elements containing B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_pi8 (char __b)
+{
+ return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
+}
+/* APPLE LOCAL end radar 4152603 */
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+#endif /* __MMX__ */
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md b/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md
new file mode 100644
index 000000000..4e55cfdc2
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/mmx.md
@@ -0,0 +1,1470 @@
+;; GCC machine description for MMX and 3dNOW! instructions
+;; Copyright (C) 2005
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;; The MMX and 3dNOW! patterns are in the same file because they use
+;; the same register file, and 3dNOW! adds a number of extensions to
+;; the base integer MMX isa.
+
+;; Note! Except for the basic move instructions, *all* of these
+;; patterns are outside the normal optabs namespace. This is because
+;; use of these registers requires the insertion of emms or femms
+;; instructions to return to normal fpu mode. The compiler doesn't
+;; know how to do that itself, which means it's up to the user. Which
+;; means that we should never use any of these patterns except at the
+;; direction of the user via a builtin.
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+;; 8 byte integral modes handled by MMX (and by extension, SSE)
+(define_mode_macro MMXMODEI [V8QI V4HI V2SI V1DI])
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF V1DI])
+
+;; Mix-n-match
+(define_mode_macro MMXMODE12 [V8QI V4HI])
+(define_mode_macro MMXMODE24 [V4HI V2SI])
+(define_mode_macro MMXMODE124 [V8QI V4HI V2SI])
+(define_mode_macro MMXMODE248 [V4HI V2SI V1DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for MMX as well as 3dNOW.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+ [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+;; Take {ym->y} into account for register allocation
+(define_insn "*mov<mode>_internal_rex64"
+ [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "=rm,r,*y,*y ,m ,*y,Yt,x,x ,m,r,x")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (match_operand:MMXMODEI 1 "vector_move_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "Cr ,m,C ,*ym,*y,Yt,*y,C,xm,x,x,r"))]
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ "TARGET_64BIT && TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "*mov<mode>_internal"
+;; APPLE LOCAL begin radar 4043818
+ [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "=*y,y ,m ,*y ,*Yt,*Yt,*Yt ,m ,*x,*x,*x,m ,?r ,?m")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (match_operand:MMXMODEI 1 "vector_move_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "C ,*ym,*y,*Yt,*y ,C ,*Ytm,*Yt,C ,*x,m ,*x,irm,r"))]
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+;; APPLE LOCAL end radar 4043818
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_expand "movv2sf"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (V2SFmode, operands);
+ DONE;
+})
+
+(define_insn "*movv2sf_internal_rex64"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "=rm,r,*y ,*y ,m ,*y,Yt,x,x,x,m,r,x")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (match_operand:V2SF 1 "vector_move_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "Cr ,m ,C ,*ym,*y,Yt,*y,C,x,m,x,x,r"))]
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ "TARGET_64BIT && TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "=*y,*y ,m,*y ,*Yt,*x,*x,*x,m ,?r ,?m")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (match_operand:V2SF 1 "vector_move_operand"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ "C ,*ym,*y,*Yt,*y ,C ,*x,m ,*x,irm,r"))]
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+;; %%% This multiword shite has got to go.
+(define_split
+ [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODE 1 "general_operand" ""))]
+;; APPLE LOCAL begin 4099020
+ "!TARGET_64BIT && reload_completed
+ && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]) && GET_CODE (operands[0]) != SUBREG)
+ && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]) && GET_CODE (operands[1]) != SUBREG)"
+;; APPLE LOCAL end 4099020
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_expand "push<mode>1"
+ [(match_operand:MMXMODE 0 "register_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "sse_movntv1di"
+ [(set (match_operand:V1DI 0 "memory_operand" "=m")
+ (unspec:V1DI [(match_operand:V1DI 1 "register_operand" "y")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "movntq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxmov")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+ "pfadd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+ (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
+ "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pfsub\\t{%2, %0|%0, %2}
+ pfsubr\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_subrv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "")
+
+(define_insn "mmx_mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+ "pfmul\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_smaxv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smax:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (SMAX, V2SFmode, operands)"
+ "pfmax\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_sminv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smin:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (SMIN, V2SFmode, operands)"
+ "pfmin\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCP))]
+ "TARGET_3DNOW"
+ "pfrcp\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT1))]
+ "TARGET_3DNOW"
+ "pfrcpit1\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit2v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT2))]
+ "TARGET_3DNOW"
+ "pfrcpit2\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqrtv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQRT))]
+ "TARGET_3DNOW"
+ "pfrsqrt\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQIT1))]
+ "TARGET_3DNOW"
+ "pfrsqit1\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW"
+ "pfacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_hsubv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW_A"
+ "pfnacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_addsubv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_merge:V2SF
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
+ (minus:V2SF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
+ "TARGET_3DNOW_A"
+ "pfpnacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_gtv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpgt\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gev2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpge\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_eqv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
+ "pfcmpeq\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pf2id"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pf2id\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pf2iw"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (sign_extend:V2SI
+ (ss_truncate:V2HI
+ (fix:V2SI
+ (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+ "TARGET_3DNOW_A"
+ "pf2iw\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pi2fw"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF
+ (sign_extend:V2SI
+ (truncate:V2HI
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
+ "TARGET_3DNOW_A"
+ "pi2fw\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_floatv2si2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pi2fd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pswapdv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "*vec_dupv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_duplicate:V2SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_MMX"
+ "punpckldq\t%0, %0"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
+ (match_operand:SF 2 "vector_move_operand" "ym,C")))]
+ "TARGET_MMX && !TARGET_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt,mmxmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2sf"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,y,m,m,frxy")
+ (vec_select:SF
+ (match_operand:V2SF 1 "nonimmediate_operand" " x,y,x,y,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "*vec_extractv2sf_1"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,frxy")
+ (vec_select:SF
+ (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ punpckhdq\t%0, %0
+ unpckhps\t%0, %0
+ #"
+ [(set_attr "type" "mmxcvt,sselog1,*")
+ (set_attr "mode" "DI,V4SF,SI")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (vec_select:SF
+ (match_operand:V2SF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && reload_completed"
+ [(const_int 0)]
+{
+ operands[1] = adjust_address (operands[1], SFmode, 4);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_extractv2sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2sf"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_add<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (plus:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "padd<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+;; remove mmx_adddi3
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "mmx_ssadd<mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ (ss_plus:MMXMODE12
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "padds<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_usadd<mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ (us_plus:MMXMODE12
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddus<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_sub<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (minus:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand" "0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psub<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+;; remove mmx_subdi3
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "mmx_sssub<mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ (ss_minus:MMXMODE12
+ (match_operand:MMXMODE12 1 "register_operand" "0")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubs<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ussub<mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ (us_minus:MMXMODE12
+ (match_operand:MMXMODE12 1 "register_operand" "0")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubus<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_smulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_umulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pmaddwd"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (plus:V2SI
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0) (const_int 2)]))))
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 1) (const_int 3)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 1) (const_int 3)]))))))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pmulhrwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V4SI [(const_int 32768) (const_int 32768)
+ (const_int 32768) (const_int 32768)]))
+ (const_int 16))))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhrw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "sse2_umulsidi3"
+ [(set (match_operand:V1DI 0 "register_operand" "=y")
+ (mult:V1DI
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)])))
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)])))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "mmx_umaxv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (UMAX, V8QImode, operands)"
+ "pmaxub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_smaxv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (SMAX, V4HImode, operands)"
+ "pmaxsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_uminv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (UMIN, V8QImode, operands)"
+ "pminub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_sminv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (SMIN, V4HImode, operands)"
+ "pminsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "mmx_ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0")
+ (match_operand:V1DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psra<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashr<mode>2si"
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0")
+ (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))]
+ "TARGET_MMX"
+ "psra<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_lshr<mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (lshiftrt:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:V1DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_lshr<mode>2si"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (lshiftrt:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))]
+ "TARGET_MMX"
+ "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashl<mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (ashift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:V1DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psll<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashl<mode>2si"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (ashift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (sign_extend:DI (match_operand:SI 2 "nonmemory_operand" "yi"))))]
+ "TARGET_MMX"
+ "psll<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "mmx_eq<mode>3"
+ [(set (match_operand:MMXMODE124 0 "register_operand" "=y")
+ (eq:MMXMODE124
+ (match_operand:MMXMODE124 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_gt<mode>3"
+ [(set (match_operand:MMXMODE124 0 "register_operand" "=y")
+ (gt:MMXMODE124
+ (match_operand:MMXMODE124 1 "register_operand" "0")
+ (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_and<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (and:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+ "pand\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_nand<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ior<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (ior:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
+ "por\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_xor<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (xor:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "pxor\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")
+ (set_attr "memory" "none")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_packsswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (ss_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (ss_truncate:V4QI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packsswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (ss_truncate:V2HI
+ (match_operand:V2SI 1 "register_operand" "0"))
+ (ss_truncate:V2HI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packssdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_packuswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (us_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (us_truncate:V4QI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packuswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_MMX"
+ "punpckhbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_MMX"
+ "punpcklbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_MMX"
+ "punpckhwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_MMX"
+ "punpcklwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_MMX"
+ "punpckhdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_MMX"
+ "punpckldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pinsrw"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (vec_merge:V4HI
+ (vec_duplicate:V4HI
+ (match_operand:SI 2 "nonimmediate_operand" ""))
+ (match_operand:V4HI 1 "register_operand" "")
+ (match_operand:SI 3 "const_0_to_3_operand" "")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[2] = gen_lowpart (HImode, operands[2]);
+ operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*mmx_pinsrw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_merge:V4HI
+ (vec_duplicate:V4HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "y")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pshufw"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V4HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "mmx_pshufw_1"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")])))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pswapdv2si2"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv4hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_duplicate:V4HI
+ (truncate:HI
+ (match_operand:SI 1 "register_operand" "0"))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pshufw\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_duplicate:V2SI
+ (match_operand:SI 1 "register_operand" "0")))]
+ "TARGET_MMX"
+ "punpckldq\t%0, %0"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
+ (match_operand:SI 2 "vector_move_operand" "ym,C")))]
+ "TARGET_MMX && !TARGET_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt,mmxmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2si"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;; APPLE LOCAL begin 4684674 permit mmx-to-int reg
+(define_insn_and_split "*vec_extractv2si_0"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=x,y,m,mr,frxy")
+ (vec_select:SI
+ (match_operand:V2SI 1 "nonimmediate_operand" " x,y,x,y,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SImode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SImode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+;; APPLE LOCAL end 4684674
+
+(define_insn "*vec_extractv2si_1"
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Yt,Yt,x,frxy")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (vec_select:SI
+;; APPLE LOCAL begin mainline 2007-06-05 5103201
+ (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Yt,0,o")
+;; APPLE LOCAL end mainline 2007-06-05 5103201
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ punpckhdq\t%0, %0
+ punpckhdq\t%0, %0
+ pshufd\t{$85, %1, %0|%0, %1, 85}
+ unpckhps\t%0, %0
+ #"
+ [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,*")
+ (set_attr "mode" "DI,TI,TI,V4SF,SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (vec_select:SI
+ (match_operand:V2SI 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && reload_completed"
+ [(const_int 0)]
+{
+ operands[1] = adjust_address (operands[1], SImode, 4);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_extractv2si"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:V2SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2si"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4hi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv4hi"
+ [(match_operand:HI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv4hi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv8qi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand:QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv8qi"
+ [(match_operand:QI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv8qi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_uavgv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (truncate:V8QI
+ (lshiftrt:V8HI
+ (plus:V8HI
+ (plus:V8HI
+ (zero_extend:V8HI
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8HI
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "(TARGET_SSE || TARGET_3DNOW)
+ && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
+{
+ /* These two instructions have the same operation, but their encoding
+ is different. Prefer the one that is de facto standard. */
+ if (TARGET_SSE || TARGET_3DNOW_A)
+ return "pavgb\t{%2, %0|%0, %2}";
+ else
+ return "pavgusb\\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_uavgv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (plus:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V4SI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
+ "pavgw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "mmx_psadbw"
+ [(set (match_operand:V1DI 0 "register_operand" "=y")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSADBW))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "psadbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "mmx_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_maskmovq"
+ [(set (match_operand:V8QI 0 "memory_operand" "")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")
+ (match_dup 0)]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "")
+
+(define_insn "*mmx_maskmovq"
+ [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")
+ (mem:V8QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovq\t{%2, %1|%1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_maskmovq_rex"
+ [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")
+ (mem:V8QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovq\t{%2, %1|%1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_emms"
+ [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
+ (clobber (reg:XF 8))
+ (clobber (reg:XF 9))
+ (clobber (reg:XF 10))
+ (clobber (reg:XF 11))
+ (clobber (reg:XF 12))
+ (clobber (reg:XF 13))
+ (clobber (reg:XF 14))
+ (clobber (reg:XF 15))
+ (clobber (reg:DI 29))
+ (clobber (reg:DI 30))
+ (clobber (reg:DI 31))
+ (clobber (reg:DI 32))
+ (clobber (reg:DI 33))
+ (clobber (reg:DI 34))
+ (clobber (reg:DI 35))
+ (clobber (reg:DI 36))]
+ "TARGET_MMX"
+ "emms"
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "unknown")])
+
+(define_insn "mmx_femms"
+ [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
+ (clobber (reg:XF 8))
+ (clobber (reg:XF 9))
+ (clobber (reg:XF 10))
+ (clobber (reg:XF 11))
+ (clobber (reg:XF 12))
+ (clobber (reg:XF 13))
+ (clobber (reg:XF 14))
+ (clobber (reg:XF 15))
+ (clobber (reg:DI 29))
+ (clobber (reg:DI 30))
+ (clobber (reg:DI 31))
+ (clobber (reg:DI 32))
+ (clobber (reg:DI 33))
+ (clobber (reg:DI 34))
+ (clobber (reg:DI 35))
+ (clobber (reg:DI 36))]
+ "TARGET_3DNOW"
+ "femms"
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h
new file mode 100644
index 000000000..5c0db207b
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/nmmintrin.h
@@ -0,0 +1,41 @@
+/* APPLE LOCAL file 5612787 mainline sse4 */
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _NMMINTRIN_H_INCLUDED
+#define _NMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_2__
+# error "SSE4.2 instruction set not enabled"
+#else
+/* We just include SSE4.1 header file. */
+#include <smmintrin.h>
+#endif /* __SSE4_2__ */
+
+#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md b/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md
new file mode 100644
index 000000000..1f994dd60
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/pentium.md
@@ -0,0 +1,312 @@
+;; Pentium Scheduling
+;; Copyright (C) 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA. */
+;;
+;; The Pentium is an in-order core with two integer pipelines.
+
+;; True for insns that behave like prefixed insns on the Pentium.
+(define_attr "pent_prefix" "false,true"
+ (if_then_else (ior (eq_attr "prefix_0f" "1")
+ (ior (eq_attr "prefix_data16" "1")
+ (eq_attr "prefix_rep" "1")))
+ (const_string "true")
+ (const_string "false")))
+
+;; Categorize how an instruction slots.
+
+;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
+;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
+;; rules, because it results in noticeably better code on non-MMX Pentium
+;; and doesn't hurt much on MMX. (Prefixed instructions are not very
+;; common, so the scheduler usually has a non-prefixed insn to pair).
+
+(define_attr "pent_pair" "uv,pu,pv,np"
+ (cond [(eq_attr "imm_disp" "true")
+ (const_string "np")
+ (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
+ (and (eq_attr "type" "pop,push")
+ (eq_attr "memory" "!both")))
+ (if_then_else (eq_attr "pent_prefix" "true")
+ (const_string "pu")
+ (const_string "uv"))
+ (eq_attr "type" "ibr")
+ (const_string "pv")
+ (and (eq_attr "type" "ishift")
+ (match_operand 2 "const_int_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "rotate")
+ (match_operand 2 "const1_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "ishift1")
+ (match_operand 1 "const_int_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "rotate1")
+ (match_operand 1 "const1_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_string "pv")
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_string "pv")
+ ]
+ (const_string "np")))
+
+(define_automaton "pentium,pentium_fpu")
+
+;; Pentium do have U and V pipes. Instruction to both pipes
+;; are always issued together, much like on VLIW.
+;;
+;; predecode
+;; / \
+;; decodeu decodev
+;; / | |
+;; fpu executeu executev
+;; | | |
+;; fpu retire retire
+;; |
+;; fpu
+;; We add dummy "port" pipes allocated only first cycle of
+;; instruction to specify this behavior.
+
+(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
+(define_cpu_unit "pentium-u,pentium-v" "pentium")
+(absence_set "pentium-portu" "pentium-u,pentium-v")
+(presence_set "pentium-portv" "pentium-portu")
+
+;; Floating point instructions can overlap with new issue of integer
+;; instructions. We model only first cycle of FP pipeline, as it is
+;; fully pipelined.
+(define_cpu_unit "pentium-fp" "pentium_fpu")
+
+;; There is non-pipelined multiplier unit used for complex operations.
+(define_cpu_unit "pentium-fmul" "pentium_fpu")
+
+;; Pentium preserves memory ordering, so when load-execute-store
+;; instruction is executed together with other instruction loading
+;; data, the execution of the other instruction is delayed to very
+;; last cycle of first instruction, when data are bypassed.
+;; We model this by allocating "memory" unit when store is pending
+;; and using conflicting load units together.
+
+(define_cpu_unit "pentium-memory" "pentium")
+(define_cpu_unit "pentium-load0" "pentium")
+(define_cpu_unit "pentium-load1" "pentium")
+(absence_set "pentium-load0,pentium-load1" "pentium-memory")
+
+(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
+(define_reservation "pentium-np" "(pentium-u + pentium-v)")
+(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
+(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
+(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
+(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
+(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
+(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
+(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
+(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
+ + pentium-memory)")
+(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv
+ + pentium-memory)")
+(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
+ + pentium-memory)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+
+;; Few common long latency instructions
+(define_insn_reservation "pent_mul" 11
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "imul"))
+ "pentium-np*11")
+
+(define_insn_reservation "pent_str" 12
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "str"))
+ "pentium-np*12")
+
+;; Integer division and some other long latency instruction block all
+;; units, including the FP pipe. There is no value in modeling the
+;; latency of these instructions and not modeling the latency
+;; decreases the size of the DFA.
+(define_insn_reservation "pent_block" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "idiv"))
+ "pentium-np+pentium-fp")
+
+(define_insn_reservation "pent_cld" 2
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "cld"))
+ "pentium-np*2")
+
+;; Moves usually have one cycle penalty, but there are exceptions.
+(define_insn_reservation "pent_fmov" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none,load")))
+ "(pentium-fp+pentium-np)")
+
+(define_insn_reservation "pent_fpmovxf" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF"))))
+ "(pentium-fp+pentium-np)*3")
+
+(define_insn_reservation "pent_fpstore" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (ior (match_operand 1 "immediate_operand" "")
+ (eq_attr "memory" "store"))))
+ "(pentium-fp+pentium-np)*2")
+
+(define_insn_reservation "pent_imov" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "imov"))
+ "pentium-firstuv")
+
+;; Push and pop instructions have 1 cycle latency and special
+;; hardware bypass allows them to be paired with other push,pop
+;; and call instructions.
+(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
+(define_insn_reservation "pent_push" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "push")
+ (eq_attr "memory" "store")))
+ "pentium-firstuv")
+
+(define_insn_reservation "pent_pop" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "pop,leave"))
+ "pentium-firstuv")
+
+;; Call and branch instruction can execute in either pipe, but
+;; they are only pairable when in the v pipe.
+(define_insn_reservation "pent_call" 10
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "call,callv"))
+ "pentium-firstv,pentium-v*9")
+
+(define_insn_reservation "pent_branch" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "ibr"))
+ "pentium-firstv")
+
+;; Floating point instruction dispatch in U pipe, but continue
+;; in FP pipeline allowing other instructions to be executed.
+(define_insn_reservation "pent_fp" 3
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fop,fistp"))
+ "(pentium-firstu+pentium-fp),nothing,nothing")
+
+;; First two cycles of fmul are not pipelined.
+(define_insn_reservation "pent_fmul" 3
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fmul"))
+ "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
+
+;; Long latency FP instructions overlap with integer instructions,
+;; but only last 2 cycles with FP ones.
+(define_insn_reservation "pent_fdiv" 39
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fdiv"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
+
+(define_insn_reservation "pent_fpspc" 70
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fpspc"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
+
+;; Integer instructions. Load/execute/store takes 3 cycles,
+;; load/execute 2 cycles and execute only one cycle.
+(define_insn_reservation "pent_uv_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "both")))
+ "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
+
+(define_insn_reservation "pent_u_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "both")))
+ "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
+
+(define_insn_reservation "pent_v_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "both")))
+ "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
+
+(define_insn_reservation "pent_np_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "both")))
+ "pentium-np,pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "load")))
+ "pentium-firstuvload,pentium-uv")
+
+(define_insn_reservation "pent_u_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "load")))
+ "pentium-firstuload,pentium-u")
+
+(define_insn_reservation "pent_v_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "load")))
+ "pentium-firstvload,pentium-v")
+
+(define_insn_reservation "pent_np_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "load")))
+ "pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "none")))
+ "pentium-firstuv")
+
+(define_insn_reservation "pent_u" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "none")))
+ "pentium-firstu")
+
+(define_insn_reservation "pent_v" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "none")))
+ "pentium-firstv")
+
+(define_insn_reservation "pent_np" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "none")))
+ "pentium-np")
+
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h
new file mode 100644
index 000000000..764094186
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/pmmintrin.h
@@ -0,0 +1,172 @@
+/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
+/* Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+#ifdef __SSE3__
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+/* Additional bits in the MXCSR. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#define _MM_DENORMALS_ZERO_OFF 0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+ _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+ (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* APPLE LOCAL begin radar 4152603 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movehdup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_moveldup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loaddup_pd (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movedup_pd (__m128d __X)
+{
+ return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_lddqu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_monitor (__P, __E, __H);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_mwait (__E, __H);
+}
+/* APPLE LOCAL end radar 4152603 */
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+#endif /* __SSE3__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md b/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md
new file mode 100644
index 000000000..3e31eb336
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/ppro.md
@@ -0,0 +1,763 @@
+;; Scheduling for the Intel P6 family of processors
+;; Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA. */
+
+;; The P6 family includes the Pentium Pro, Pentium II, Pentium III, Celeron
+;; and Xeon lines of CPUs. The DFA scheduler description in this file is
+;; based on information that can be found in the following three documents:
+;;
+;; "P6 Family of Processors Hardware Developer's Manual",
+;; Intel, September 1999.
+;;
+;; "Intel Architecture Optimization Manual",
+;; Intel, 1999 (Order Number: 245127-001).
+;;
+;; "How to optimize for the Pentium family of microprocessors",
+;; by Agner Fog, PhD.
+;;
+;; The P6 pipeline has three major components:
+;; 1) the FETCH/DECODE unit, an in-order issue front-end
+;; 2) the DISPATCH/EXECUTE unit, which is the out-of-order core
+;; 3) the RETIRE unit, an in-order retirement unit
+;;
+;; So, the P6 CPUs have out-of-order cores, but the instruction decoder and
+;; retirement unit are naturally in-order.
+;;
+;; BUS INTERFACE UNIT
+;; / \
+;; L1 ICACHE L1 DCACHE
+;; / | \ | \
+;; DECODER0 DECODER1 DECODER2 DISP/EXEC RETIRE
+;; \ | / | |
+;; INSTRUCTION POOL __________|_______/
+;; (inc. reorder buffer)
+;;
+;; Since the P6 CPUs execute instructions out-of-order, the most important
+;; consideration in performance tuning is making sure enough micro-ops are
+;; ready for execution in the out-of-order core, while not stalling the
+;; decoder.
+;;
+;; TODO:
+;; - Find a less crude way to model complex instructions, in
+;; particular how many cycles they take to be decoded.
+;; - Include decoder latencies in the total reservation latencies.
+;; This isn't necessary right now because we assume for every
+;; instruction that it never blocks a decoder.
+;; - Figure out where the p0 and p1 reservations come from. These
+;; appear not to be in the manual (e.g. why is cld "(p0+p1)*2"
+;; better than "(p0|p1)*4" ???)
+;; - Lots more because I'm sure this is still far from optimal :-)
+
+;; The ppro_idiv and ppro_fdiv automata are used to model issue
+;; latencies of idiv and fdiv type insns.
+(define_automaton "ppro_decoder,ppro_core,ppro_idiv,ppro_fdiv,ppro_load,ppro_store")
+
+;; Simple instructions of the register-register form have only one uop.
+;; Load instructions are also only one uop. Store instructions decode to
+;; two uops, and simple read-modify instructions also take two uops.
+;; Simple instructions of the register-memory form have two to three uops.
+;; Simple read-modify-write instructions have four uops. The rules for
+;; the decoder are simple:
+;; - an instruction with 1 uop can be decoded by any of the three
+;; decoders in one cycle.
+;; - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;; but still in only one cycle.
+;; - a complex (microcode) instruction can also only be decoded by
+;; decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "decoder0" "ppro_decoder")
+(define_cpu_unit "decoder1" "ppro_decoder")
+(define_cpu_unit "decoder2" "ppro_decoder")
+
+;; We first wish to find an instruction for decoder0, so exclude
+;; decoder1 and decoder2 from being reserved until decoder 0 is
+;; reserved.
+(presence_set "decoder1" "decoder0")
+(presence_set "decoder2" "decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "decodern" "(decoder0|decoder1|decoder2)")
+
+;; The out-of-order core has five pipelines. During each cycle, the core
+;; may dispatch zero or one uop on the port of any of the five pipelines
+;; so the maximum number of dispatched uops per cycle is 5. In practicer,
+;; 3 uops per cycle is more realistic.
+;;
+;; Two of the five pipelines contain several execution units:
+;;
+;; Port 0 Port 1 Port 2 Port 3 Port 4
+;; ALU ALU LOAD SAC SDA
+;; FPU JUE
+;; AGU MMX
+;; MMX P3FPU
+;; P3FPU
+;;
+;; (SAC=Store Address Calculation, SDA=Store Data Unit, P3FPU = SSE unit,
+;; JUE = Jump Execution Unit, AGU = Address Generation Unit)
+;;
+(define_cpu_unit "p0,p1" "ppro_core")
+(define_cpu_unit "p2" "ppro_load")
+(define_cpu_unit "p3,p4" "ppro_store")
+(define_cpu_unit "idiv" "ppro_idiv")
+(define_cpu_unit "fdiv" "ppro_fdiv")
+
+;; Only the irregular instructions have to be modeled here. A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already. Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3. These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals. All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on decoder0. Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core. So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "ppro_complex_insn" 6
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "other,multi,call,callv,str"))
+ "decoder0")
+
+;; imov with memory operands does not use the integer units.
+(define_insn_reservation "ppro_imov" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imov")))
+ "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imov_load" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imov")))
+ "decodern,p2")
+
+(define_insn_reservation "ppro_imov_store" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "imov")))
+ "decoder0,p4+p3")
+
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "ppro_imovx" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imovx")))
+ "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imovx_load" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imovx")))
+ "decodern,p2")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "ppro_lea" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "lea")))
+ "decodern,p0")
+
+;; Shift and rotate execute on port 0 with latency and throughput 1.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "ppro_shift_rotate" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_shift_rotate_mem" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+ "decoder0,p2+p0,p4+p3")
+
+(define_insn_reservation "ppro_cld" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "cld"))
+ "decoder0,(p0+p1)*2")
+
+;; The P6 has a sophisticated branch prediction mechanism to minimize
+;; latencies due to branching. In particular, it has a fast way to
+;; execute branches that are taken multiple times (such as in loops).
+;; Branches not taken suffer no penalty, and correctly predicted
+;; branches cost only one fetch cycle. Mispredicted branches are very
+;; costly: typically 15 cycles and possibly as many as 26 cycles.
+;;
+;; Unfortunately all this makes it quite difficult to properly model
+;; the latencies for the compiler. Here I've made the choice to be
+;; optimistic and assume branches are often predicted correctly, so
+;; they have latency 1, and the decoders are not blocked.
+;;
+;; In addition, the model assumes a branch always decodes to only 1 uop,
+;; which is not exactly true because there are a few instructions that
+;; decode to 2 uops or microcode. But this probably gives the best
+;; results because we can assume these instructions can decode on all
+;; decoders.
+(define_insn_reservation "ppro_branch" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ibr")))
+ "decodern,p1")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "ppro_indirect_branch" 6
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "ibr")))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_leave" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "leave"))
+ "decoder0,p2+(p0|p1),(p0|p1)")
+
+;; imul has throughput one, but latency 4, and can only execute on port 0.
+(define_insn_reservation "ppro_imul" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imul")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_imul_mem" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "imul")))
+ "decoder0,p2+p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the ppro_div automaton.
+(define_insn_reservation "ppro_idiv_QI" 19
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_QI_load" 19
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_HI" 23
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17")
+
+(define_insn_reservation "ppro_idiv_HI_load" 23
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18")
+
+(define_insn_reservation "ppro_idiv_SI" 39
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33")
+
+(define_insn_reservation "ppro_idiv_SI_load" 39
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*34")
+
+;; Floating point operations always execute on port 0.
+;; ??? where do these latencies come from? fadd has latency 3 and
+;; has throughput "1/cycle (align with FADD)". What do they
+;; mean and how can we model that?
+(define_insn_reservation "ppro_fop" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "fop")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fop_load" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fop")))
+ "decoder0,p2+p0,p0")
+
+(define_insn_reservation "ppro_fop_store" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "fop")))
+ "decoder0,p0,p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fop_both" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "fop")))
+ "decoder0,p2+p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fsgn" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "fsgn"))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fistp" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "fistp"))
+ "decoder0,p0*2,p4+p3")
+
+(define_insn_reservation "ppro_fcmov" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (eq_attr "type" "fcmov"))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fcmp" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fcmp")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fcmp_load" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fcmp")))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_fmov" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fmov")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_load" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "!XF")
+ (eq_attr "type" "fmov"))))
+ "decodern,p2")
+
+(define_insn_reservation "ppro_fmov_XF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fmov"))))
+ "decoder0,(p2+p0)*2")
+
+(define_insn_reservation "ppro_fmov_store" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "!XF")
+ (eq_attr "type" "fmov"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_XF_store" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fmov"))))
+ "decoder0,(p0+p4),(p0+p3)")
+
+;; fmul executes on port 0 with latency 5. It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "ppro_fmul" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fmul")))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fmul_load" 6
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fmul")))
+ "decoder0,p2+p0,p0")
+
+;; fdiv latencies depend on the mode of the operands. XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that. Throughput is equal to latency - 1, which we model using the
+;; ppro_div automaton.
+(define_insn_reservation "ppro_fdiv_SF" 18
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_SF_load" 19
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_DF" 32
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_DF_load" 33
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_XF" 38
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*36")
+
+(define_insn_reservation "ppro_fdiv_XF_load" 39
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*36")
+
+;; MMX instructions can execute on either port 0 or port 1 with a
+;; throughput of 1/cycle.
+;; on port 0: - ALU (latency 1)
+;; - Multiplier Unit (latency 3)
+;; on port 1: - ALU (latency 1)
+;; - Shift Unit (latency 1)
+;;
+;; MMX instructions are either of the type reg-reg, or read-modify, and
+;; except for mmxshft and mmxmul they can execute on port 0 or port 1,
+;; so they behave as "simple" instructions that need no special modelling.
+;; We only have to model mmxshft and mmxmul.
+(define_insn_reservation "ppro_mmx_shft" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxshft")))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_mmx_shft_load" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxshft")))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_mmx_mul" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmul")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_mmx_mul_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmul")))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mmxcvt" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "type" "mmxcvt")))
+ "decodern,p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "ppro_sse_mmxshft" 2
+;; (and (eq_attr "cpu" "pentiumpro,generic32")
+;; (and (eq_attr "mode" "DI")
+;; (eq_attr "type" "mmxshft")))
+;; "decodern,p0")
+
+;; SSE is very complicated, and takes a bit more effort.
+;; ??? I assumed that all SSE instructions decode on decoder0,
+;; but is this correct?
+
+;; The sfence instruction.
+(define_insn_reservation "ppro_sse_sfence" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "unknown")
+ (eq_attr "type" "sse")))
+ "decoder0,p4+p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "ppro_sse_SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sse")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_add_SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseadd"))))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_sse_add_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_comi_SF" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecomi"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_comi_SF_load" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecomi"))))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mul_SF" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemul"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_mul_SF_load" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,p2+p0")
+
+;; FIXME: ssediv doesn't close p0 for 17 cycles, surely???
+(define_insn_reservation "ppro_sse_div_SF" 18
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,p0*17")
+
+(define_insn_reservation "ppro_sse_div_SF_load" 18
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,(p2+p0),p0*16")
+
+(define_insn_reservation "ppro_sse_icvt_SF" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseicvt")))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_icvt_SI" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "sseicvt")))
+ "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_store" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p4+p3")
+
+(define_insn_reservation "ppro_sse_V4SF" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sse")))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecvt"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "!none,unknown")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1,p4+p3")
+
+(define_insn_reservation "ppro_sse_mul_V4SF" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,(p2+p0)*2")
+
+;; FIXME: p0 really closed this long???
+(define_insn_reservation "ppro_sse_div_V4SF" 48
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,p0*34")
+
+(define_insn_reservation "ppro_sse_div_V4SF_load" 48
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,(p2+p0)*2,p0*32")
+
+(define_insn_reservation "ppro_sse_log_V4SF" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sselog,sselog1"))))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_sse_log_V4SF_load" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sselog,sselog1"))))
+ "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_V4SF" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p0|p1)*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p2*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p4+p3)*2")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0 or port 1. This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.
+(define_insn_reservation "ppro_insn" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decodern,(p0|p1)")
+
+;; read-modify and register-memory instructions have 2 or three uops,
+;; so they have to be decoded on decoder0.
+(define_insn_reservation "ppro_insn_load" 3
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_insn_store" 1
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,(p0|p1),p4+p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on decoder0 as well.
+(define_insn_reservation "ppro_insn_both" 4
+ (and (eq_attr "cpu" "pentiumpro,generic32")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,p2+(p0|p1),p4+p3")
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md b/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md
new file mode 100644
index 000000000..f988d11e4
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/predicates.md
@@ -0,0 +1,1037 @@
+;; Predicate definitions for IA-32 and x86-64.
+;; Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;; Return nonzero if OP is either a i387 or SSE fp register.
+(define_predicate "any_fp_register_operand"
+ (and (match_code "reg")
+ (match_test "ANY_FP_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is an i387 fp register.
+(define_predicate "fp_register_operand"
+ (and (match_code "reg")
+ (match_test "FP_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is a non-fp register_operand.
+(define_predicate "register_and_not_any_fp_reg_operand"
+ (and (match_code "reg")
+ (not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
+
+;; Return nonzero if OP is a register operand other than an i387 fp register.
+(define_predicate "register_and_not_fp_reg_operand"
+ (and (match_code "reg")
+ (not (match_test "FP_REGNO_P (REGNO (op))"))))
+
+;; True if the operand is an MMX register.
+(define_predicate "mmx_reg_operand"
+ (and (match_code "reg")
+ (match_test "MMX_REGNO_P (REGNO (op))")))
+
+;; True if the operand is a Q_REGS class register.
+(define_predicate "q_regs_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return ANY_QI_REG_P (op);
+})
+
+;; Return true if op is a NON_Q_REGS class register.
+(define_predicate "non_q_regs_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return NON_QI_REG_P (op);
+})
+
+;; Match an SI or HImode register for a zero_extract.
+(define_special_predicate "ext_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if ((!TARGET_64BIT || GET_MODE (op) != DImode)
+ && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+ return 0;
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ /* Be careful to accept only registers having upper parts. */
+ return REGNO (op) > LAST_VIRTUAL_REGISTER || REGNO (op) < 4;
+})
+
+;; Return true if op is the AX register.
+(define_predicate "ax_reg_operand"
+ (and (match_code "reg")
+ (match_test "REGNO (op) == 0")))
+
+;; Return true if op is the flags register.
+(define_predicate "flags_reg_operand"
+ (and (match_code "reg")
+ (match_test "REGNO (op) == FLAGS_REG")))
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; Return true if op is not xmm0 register.
+(define_predicate "reg_not_xmm0_operand"
+ (and (match_operand 0 "register_operand")
+ (match_test "GET_CODE (op) != REG
+ || REGNO (op) != FIRST_SSE_REG")))
+
+;; As above, but allow nonimmediate operands.
+(define_predicate "nonimm_not_xmm0_operand"
+ (and (match_operand 0 "nonimmediate_operand")
+ (match_test "GET_CODE (op) != REG
+ || REGNO (op) != FIRST_SSE_REG")))
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Return 1 if VALUE can be stored in a sign extended immediate field.
+(define_predicate "x86_64_immediate_operand"
+ (match_code "const_int,symbol_ref,label_ref,const")
+{
+ if (!TARGET_64BIT)
+ return immediate_operand (op, mode);
+
+ switch (GET_CODE (op))
+ {
+ case CONST_INT:
+ /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
+ to be at least 32 and this all acceptable constants are
+ represented as CONST_INT. */
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return 1;
+ else
+ {
+ HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (op), DImode);
+ return trunc_int_for_mode (val, SImode) == val;
+ }
+ break;
+
+ case SYMBOL_REF:
+ /* For certain code models, the symbolic references are known to fit.
+ in CM_SMALL_PIC model we know it fits if it is local to the shared
+ library. Don't count TLS SYMBOL_REFs here, since they should fit
+ only if inside of UNSPEC handled below. */
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+ return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
+ || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+ case LABEL_REF:
+ /* For certain code models, the code is near as well. */
+ return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
+ || ix86_cmodel == CM_KERNEL);
+
+ case CONST:
+ /* We also may accept the offsetted memory references in certain
+ special cases. */
+ if (GET_CODE (XEXP (op, 0)) == UNSPEC)
+ switch (XINT (XEXP (op, 0), 1))
+ {
+ case UNSPEC_GOTPCREL:
+ case UNSPEC_DTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_NTPOFF:
+ return 1;
+ default:
+ break;
+ }
+
+ if (GET_CODE (XEXP (op, 0)) == PLUS)
+ {
+ rtx op1 = XEXP (XEXP (op, 0), 0);
+ rtx op2 = XEXP (XEXP (op, 0), 1);
+ HOST_WIDE_INT offset;
+
+ if (ix86_cmodel == CM_LARGE)
+ return 0;
+ if (GET_CODE (op2) != CONST_INT)
+ return 0;
+ offset = trunc_int_for_mode (INTVAL (op2), DImode);
+ switch (GET_CODE (op1))
+ {
+ case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op1))
+ return 0;
+ /* For CM_SMALL assume that latest object is 16MB before
+ end of 31bits boundary. We may also accept pretty
+ large negative constants knowing that all objects are
+ in the positive half of address space. */
+ if ((ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op1)))
+ && offset < 16*1024*1024
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ /* For CM_KERNEL we know that all object resist in the
+ negative half of 32bits address space. We may not
+ accept negative offsets, since they may be just off
+ and we may accept pretty large positive ones. */
+ if (ix86_cmodel == CM_KERNEL
+ && offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ break;
+
+ case LABEL_REF:
+ /* These conditions are similar to SYMBOL_REF ones, just the
+ constraints for code models differ. */
+ if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+ && offset < 16*1024*1024
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ if (ix86_cmodel == CM_KERNEL
+ && offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ break;
+
+ case UNSPEC:
+ switch (XINT (op1, 1))
+ {
+ case UNSPEC_DTPOFF:
+ case UNSPEC_NTPOFF:
+ if (offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return 0;
+})
+
+;; Return 1 if VALUE can be stored in the zero extended immediate field.
+(define_predicate "x86_64_zext_immediate_operand"
+ (match_code "const_double,const_int,symbol_ref,label_ref,const")
+{
+ switch (GET_CODE (op))
+ {
+ case CONST_DOUBLE:
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return (GET_MODE (op) == VOIDmode && !CONST_DOUBLE_HIGH (op));
+ else
+ return 0;
+
+ case CONST_INT:
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return INTVAL (op) >= 0;
+ else
+ return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
+
+ case SYMBOL_REF:
+ /* For certain code models, the symbolic references are known to fit. */
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+ return (ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+ case LABEL_REF:
+ /* For certain code models, the code is near as well. */
+ return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
+
+ case CONST:
+ /* We also may accept the offsetted memory references in certain
+ special cases. */
+ if (GET_CODE (XEXP (op, 0)) == PLUS)
+ {
+ rtx op1 = XEXP (XEXP (op, 0), 0);
+ rtx op2 = XEXP (XEXP (op, 0), 1);
+
+ if (ix86_cmodel == CM_LARGE)
+ return 0;
+ switch (GET_CODE (op1))
+ {
+ case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op1))
+ return 0;
+ /* For small code model we may accept pretty large positive
+ offsets, since one bit is available for free. Negative
+ offsets are limited by the size of NULL pointer area
+ specified by the ABI. */
+ if ((ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op1)))
+ && GET_CODE (op2) == CONST_INT
+ && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+ && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+ return 1;
+ /* ??? For the kernel, we may accept adjustment of
+ -0x10000000, since we know that it will just convert
+ negative address space to positive, but perhaps this
+ is not worthwhile. */
+ break;
+
+ case LABEL_REF:
+ /* These conditions are similar to SYMBOL_REF ones, just the
+ constraints for code models differ. */
+ if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+ && GET_CODE (op2) == CONST_INT
+ && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+ && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+ return 1;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ return 0;
+})
+
+;; Return nonzero if OP is general operand representable on x86_64.
+(define_predicate "x86_64_general_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "x86_64_immediate_operand"))
+ (match_operand 0 "general_operand")))
+
+;; Return nonzero if OP is general operand representable on x86_64
+;; as either sign extended or zero extended constant.
+(define_predicate "x86_64_szext_general_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "nonimmediate_operand")
+ (ior (match_operand 0 "x86_64_immediate_operand")
+ (match_operand 0 "x86_64_zext_immediate_operand")))
+ (match_operand 0 "general_operand")))
+
+;; Return nonzero if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_nonmemory_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "x86_64_immediate_operand"))
+ (match_operand 0 "nonmemory_operand")))
+
+;; Return nonzero if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_szext_nonmemory_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "register_operand")
+ (ior (match_operand 0 "x86_64_immediate_operand")
+ (match_operand 0 "x86_64_zext_immediate_operand")))
+ (match_operand 0 "nonmemory_operand")))
+
+;; Return true when operand is PIC expression that can be computed by lea
+;; operation.
+(define_predicate "pic_32bit_operand"
+ (match_code "const,symbol_ref,label_ref")
+{
+ if (!flag_pic)
+ return 0;
+ /* Rule out relocations that translate into 64bit constants. */
+ if (TARGET_64BIT && GET_CODE (op) == CONST)
+ {
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == UNSPEC
+ && (XINT (op, 1) == UNSPEC_GOTOFF
+ || XINT (op, 1) == UNSPEC_GOT))
+ return 0;
+ }
+ return symbolic_operand (op, mode);
+})
+
+
+;; Return nonzero if OP is nonmemory operand acceptable by movabs patterns.
+(define_predicate "x86_64_movabs_operand"
+ (if_then_else (match_test "!TARGET_64BIT || !flag_pic")
+ (match_operand 0 "nonmemory_operand")
+ (ior (match_operand 0 "register_operand")
+ (and (match_operand 0 "const_double_operand")
+ (match_test "GET_MODE_SIZE (mode) <= 8")))))
+
+;; Returns nonzero if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+ (match_code "symbol_ref,label_ref,const")
+{
+ switch (GET_CODE (op))
+ {
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return 1;
+
+ case CONST:
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF
+ || (GET_CODE (op) == UNSPEC
+ && (XINT (op, 1) == UNSPEC_GOT
+ || XINT (op, 1) == UNSPEC_GOTOFF
+ || XINT (op, 1) == UNSPEC_GOTPCREL)))
+ return 1;
+ if (GET_CODE (op) != PLUS
+ || GET_CODE (XEXP (op, 1)) != CONST_INT)
+ return 0;
+
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF)
+ return 1;
+ /* Only @GOTOFF gets offsets. */
+ if (GET_CODE (op) != UNSPEC
+ || XINT (op, 1) != UNSPEC_GOTOFF)
+ return 0;
+
+ op = XVECEXP (op, 0, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF)
+ return 1;
+ return 0;
+
+ default:
+ gcc_unreachable ();
+ }
+})
+
+;; Return true if the operand contains a @GOT or @GOTOFF reference.
+(define_predicate "pic_symbolic_operand"
+ (match_code "const")
+{
+ op = XEXP (op, 0);
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (op) == UNSPEC
+ && XINT (op, 1) == UNSPEC_GOTPCREL)
+ return 1;
+ if (GET_CODE (op) == PLUS
+ && GET_CODE (XEXP (op, 0)) == UNSPEC
+ && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
+ return 1;
+ }
+ else
+ {
+ if (GET_CODE (op) == UNSPEC)
+ return 1;
+ if (GET_CODE (op) != PLUS
+ || GET_CODE (XEXP (op, 1)) != CONST_INT)
+ return 0;
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == UNSPEC)
+ return 1;
+ }
+ return 0;
+})
+
+;; Return true if OP is a symbolic operand that resolves locally.
+(define_predicate "local_symbolic_operand"
+ (match_code "const,label_ref,symbol_ref")
+{
+ if (GET_CODE (op) == CONST
+ && GET_CODE (XEXP (op, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
+ op = XEXP (XEXP (op, 0), 0);
+
+ if (GET_CODE (op) == LABEL_REF)
+ return 1;
+
+ if (GET_CODE (op) != SYMBOL_REF)
+ return 0;
+
+ if (SYMBOL_REF_TLS_MODEL (op) != 0)
+ return 0;
+
+/* APPLE LOCAL begin fix-and-continue 6358507 */
+ if (SYMBOL_REF_LOCAL_P (op))
+ {
+#if TARGET_MACHO
+ if (!indirect_data (op)
+ || machopic_data_defined_p (op))
+#endif
+ return 1;
+ }
+/* APPLE LOCAL end fix-and-continue 6358507 */
+
+ /* There is, however, a not insubstantial body of code in the rest of
+ the compiler that assumes it can just stick the results of
+ ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
+ /* ??? This is a hack. Should update the body of the compiler to
+ always create a DECL an invoke targetm.encode_section_info. */
+ if (strncmp (XSTR (op, 0), internal_label_prefix,
+ internal_label_prefix_len) == 0)
+ return 1;
+
+ return 0;
+})
+
+;; Test for various thread-local symbols.
+(define_predicate "tls_symbolic_operand"
+ (and (match_code "symbol_ref")
+ (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
+
+(define_predicate "tls_modbase_operand"
+ (and (match_code "symbol_ref")
+ (match_test "op == ix86_tls_module_base ()")))
+
+(define_predicate "tp_or_register_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_TP"))))
+
+;; Test for a pc-relative call operand
+(define_predicate "constant_call_address_operand"
+ (ior (match_code "symbol_ref")
+ (match_operand 0 "local_symbolic_operand")))
+
+;; True for any non-virtual or eliminable register. Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return !(op == arg_pointer_rtx
+ || op == frame_pointer_rtx
+ || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+ && REGNO (op) <= LAST_VIRTUAL_REGISTER));
+})
+
+;; Similarly, but include the stack pointer. This is used to prevent esp
+;; from being used as an index reg.
+(define_predicate "index_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (reload_in_progress || reload_completed)
+ return REG_OK_FOR_INDEX_STRICT_P (op);
+ else
+ return REG_OK_FOR_INDEX_NONSTRICT_P (op);
+})
+
+;; Return false if this is any eliminable register. Otherwise general_operand.
+(define_predicate "general_no_elim_operand"
+ (if_then_else (match_code "reg,subreg")
+ (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "general_operand")))
+
+;; Return false if this is any eliminable register. Otherwise
+;; register_operand or a constant.
+(define_predicate "nonmemory_no_elim_operand"
+ (ior (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "immediate_operand")))
+
+;; Test for a valid operand for a call instruction.
+(define_predicate "call_insn_operand"
+ (ior (match_operand 0 "constant_call_address_operand")
+ (ior (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "memory_operand"))))
+
+;; Similarly, but for tail calls, in which we cannot allow memory references.
+(define_predicate "sibcall_insn_operand"
+ (ior (match_operand 0 "constant_call_address_operand")
+ (match_operand 0 "register_no_elim_operand")))
+
+;; Match exactly zero.
+(define_predicate "const0_operand"
+ (match_code "const_int,const_double,const_vector")
+{
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+ return op == CONST0_RTX (mode);
+})
+
+;; Match exactly one.
+(define_predicate "const1_operand"
+ (and (match_code "const_int")
+ (match_test "op == const1_rtx")))
+
+;; Match exactly eight.
+(define_predicate "const8_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 8")))
+
+;; Match 2, 4, or 8. Used for leal multiplicands.
+(define_predicate "const248_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT i = INTVAL (op);
+ return i == 2 || i == 4 || i == 8;
+})
+
+;; Match 0 or 1.
+(define_predicate "const_0_to_1_operand"
+ (and (match_code "const_int")
+ (match_test "op == const0_rtx || op == const1_rtx")))
+
+;; Match 0 to 3.
+(define_predicate "const_0_to_3_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))
+
+;; Match 0 to 7.
+(define_predicate "const_0_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
+
+;; Match 0 to 15.
+(define_predicate "const_0_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
+
+;; Match 0 to 63.
+(define_predicate "const_0_to_63_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63")))
+
+;; Match 0 to 255.
+(define_predicate "const_0_to_255_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 255")))
+
+;; Match (0 to 255) * 8
+(define_predicate "const_0_to_255_mul_8_operand"
+ (match_code "const_int")
+{
+ unsigned HOST_WIDE_INT val = INTVAL (op);
+ return val <= 255*8 && val % 8 == 0;
+})
+
+;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
+;; for shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31")))
+
+;; Match 2 or 3.
+(define_predicate "const_2_to_3_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 2 || INTVAL (op) == 3")))
+
+;; Match 4 to 7.
+(define_predicate "const_4_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 4 && INTVAL (op) <= 7")))
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; Match exactly one bit in 2-bit mask.
+(define_predicate "const_pow2_1_to_2_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; Match exactly one bit in 4-bit mask.
+(define_predicate "const_pow2_1_to_8_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 3;
+})
+
+;; Match exactly one bit in 8-bit mask.
+(define_predicate "const_pow2_1_to_128_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 7;
+})
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; Match exactly one bit in 16-bit mask.
+(define_predicate "const_pow2_1_to_32768_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 15;
+})
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; True if this is a constant appropriate for an increment or decrement.
+(define_predicate "incdec_operand"
+ (match_code "const_int")
+{
+ /* On Pentium4, the inc and dec operations causes extra dependency on flag
+ registers, since carry flag is not set. */
+ if (!TARGET_USE_INCDEC && !optimize_size)
+ return 0;
+ return op == const1_rtx || op == constm1_rtx;
+})
+
+;; True for registers, or 1 or -1. Used to optimize double-word shifts.
+(define_predicate "reg_or_pm1_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "op == const1_rtx || op == constm1_rtx"))))
+
+;; True if OP is acceptable as operand of DImode shift expander.
+(define_predicate "shiftdi_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "register_operand")))
+
+(define_predicate "ashldi_input_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "reg_or_pm1_operand")))
+
+;; Return true if OP is a vector load from the constant pool with just
+;; the first element nonzero.
+(define_predicate "zero_extended_scalar_load_operand"
+ (match_code "mem")
+{
+ unsigned n_elts;
+ op = maybe_get_pool_constant (op);
+ if (!op)
+ return 0;
+ if (GET_CODE (op) != CONST_VECTOR)
+ return 0;
+ n_elts =
+ (GET_MODE_SIZE (GET_MODE (op)) /
+ GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
+ for (n_elts--; n_elts > 0; n_elts--)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, n_elts);
+ if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+ return 0;
+ }
+ return 1;
+})
+
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "vector_all_ones_operand"
+ (match_code "const_vector")
+{
+ int nunits = GET_MODE_NUNITS (mode);
+
+ if (GET_CODE (op) == CONST_VECTOR
+ && CONST_VECTOR_NUNITS (op) == nunits)
+ {
+ int i;
+ for (i = 0; i < nunits; ++i)
+ {
+ rtx x = CONST_VECTOR_ELT (op, i);
+ if (x != constm1_rtx)
+ return 0;
+ }
+ return 1;
+ }
+
+ return 0;
+})
+
+; Return 1 when OP is operand acceptable for standard SSE move.
+(define_predicate "vector_move_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")))
+
+;; Return 1 when OP is nonimmediate or standard SSE constant.
+(define_predicate "nonimmediate_or_sse_const_operand"
+ (match_operand 0 "general_operand")
+{
+ if (nonimmediate_operand (op, mode))
+ return 1;
+ if (standard_sse_constant_p (op) > 0)
+ return 1;
+ return 0;
+})
+
+;; APPLE LOCAL begin mainline
+/* MERGE FIXME was this replaced by reg_or_0_operand below */
+;; Return true if OP is a nonimmediate or a zero.
+(define_predicate "nonimmediate_or_0_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")))
+;; APPLE LOCAL end mainline
+
+;; Return true if OP is a register or a zero.
+(define_predicate "reg_or_0_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "const0_operand")))
+
+;; Return true if op if a valid address, and does not contain
+;; a segment override.
+(define_special_predicate "no_seg_address_operand"
+ (match_operand 0 "address_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (op, &parts);
+ gcc_assert (ok);
+ return parts.seg == SEG_DEFAULT;
+})
+
+;; Return nonzero if the rtx is known to be at least 32 bits aligned.
+(define_predicate "aligned_operand"
+ (match_operand 0 "general_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ /* Registers and immediate operands are always "aligned". */
+ if (GET_CODE (op) != MEM)
+ return 1;
+
+ /* All patterns using aligned_operand on memory operands ends up
+ in promoting memory operand to 64bit and thus causing memory mismatch. */
+ if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size)
+ return 0;
+
+ /* Don't even try to do any aligned optimizations with volatiles. */
+ if (MEM_VOLATILE_P (op))
+ return 0;
+
+ if (MEM_ALIGN (op) >= 32)
+ return 1;
+
+ op = XEXP (op, 0);
+
+ /* Pushes and pops are only valid on the stack pointer. */
+ if (GET_CODE (op) == PRE_DEC
+ || GET_CODE (op) == POST_INC)
+ return 1;
+
+ /* Decode the address. */
+ ok = ix86_decompose_address (op, &parts);
+ gcc_assert (ok);
+
+ /* Look for some component that isn't known to be aligned. */
+ if (parts.index)
+ {
+ if (REGNO_POINTER_ALIGN (REGNO (parts.index)) * parts.scale < 32)
+ return 0;
+ }
+ if (parts.base)
+ {
+ if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
+ return 0;
+ }
+ if (parts.disp)
+ {
+ if (GET_CODE (parts.disp) != CONST_INT
+ || (INTVAL (parts.disp) & 3) != 0)
+ return 0;
+ }
+
+ /* Didn't find one -- this must be an aligned address. */
+ return 1;
+})
+
+;; Returns 1 if OP is memory operand with a displacement.
+(define_predicate "memory_displacement_operand"
+ (match_operand 0 "memory_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (XEXP (op, 0), &parts);
+ gcc_assert (ok);
+ return parts.disp != NULL_RTX;
+})
+
+;; Returns 1 if OP is memory operand with a displacement only.
+(define_predicate "memory_displacement_only_operand"
+ (match_operand 0 "memory_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (XEXP (op, 0), &parts);
+ gcc_assert (ok);
+
+ if (parts.base || parts.index)
+ return 0;
+
+ return parts.disp != NULL_RTX;
+})
+
+;; Returns 1 if OP is memory operand that cannot be represented
+;; by the modRM array.
+(define_predicate "long_memory_operand"
+ (and (match_operand 0 "memory_operand")
+ (match_test "memory_address_length (op) != 0")))
+
+;; Return 1 if OP is a comparison operator that can be issued by fcmov.
+(define_predicate "fcmov_comparison_operator"
+ (match_operand 0 "comparison_operator")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ if (bypass_code != UNKNOWN || second_code != UNKNOWN)
+ return 0;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+ /* i387 supports just limited amount of conditional codes. */
+ switch (code)
+ {
+ case LTU: case GTU: case LEU: case GEU:
+ if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
+ return 1;
+ return 0;
+ case ORDERED: case UNORDERED:
+ case EQ: case NE:
+ return 1;
+ default:
+ return 0;
+ }
+})
+
+;; Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns.
+;; The first set are supported directly; the second set can't be done with
+;; full IEEE support, i.e. NaNs.
+;;
+;; ??? It would seem that we have a lot of uses of this predicate that pass
+;; it the wrong mode. We got away with this because the old function didn't
+;; check the mode at all. Mirror that for now by calling this a special
+;; predicate.
+
+(define_special_predicate "sse_comparison_operator"
+ (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
+
+;; Return 1 if OP is a valid comparison operator in valid mode.
+(define_predicate "ix86_comparison_operator"
+ (match_operand 0 "comparison_operator")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ return (bypass_code == UNKNOWN && second_code == UNKNOWN);
+ }
+ switch (code)
+ {
+ case EQ: case NE:
+ return 1;
+ case LT: case GE:
+ if (inmode == CCmode || inmode == CCGCmode
+ || inmode == CCGOCmode || inmode == CCNOmode)
+ return 1;
+ return 0;
+ case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
+ if (inmode == CCmode)
+ return 1;
+ return 0;
+ case GT: case LE:
+ if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
+ return 1;
+ return 0;
+ default:
+ return 0;
+ }
+})
+
+;; Return 1 if OP is a valid comparison operator testing carry flag to be set.
+(define_predicate "ix86_carry_flag_operator"
+ (match_code "ltu,lt,unlt,gt,ungt,le,unle,ge,unge,ltgt,uneq")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (GET_CODE (XEXP (op, 0)) != REG
+ || REGNO (XEXP (op, 0)) != FLAGS_REG
+ || XEXP (op, 1) != const0_rtx)
+ return 0;
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ if (bypass_code != UNKNOWN || second_code != UNKNOWN)
+ return 0;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+ else if (inmode != CCmode)
+ return 0;
+
+ return code == LTU;
+})
+
+;; Nearly general operand, but accept any const_double, since we wish
+;; to be able to drop them into memory rather than have them get pulled
+;; into registers.
+(define_predicate "cmp_fp_expander_operand"
+ (ior (match_code "const_double")
+ (match_operand 0 "general_operand")))
+
+;; Return true if this is a valid binary floating-point operation.
+(define_predicate "binary_fp_operator"
+ (match_code "plus,minus,mult,div"))
+
+;; Return true if this is a multiply operation.
+(define_predicate "mult_operator"
+ (match_code "mult"))
+
+;; Return true if this is a division operation.
+(define_predicate "div_operator"
+ (match_code "div"))
+
+;; Return true if this is a float extend operation.
+(define_predicate "float_operator"
+ (match_code "float"))
+
+;; Return true for ARITHMETIC_P.
+(define_predicate "arith_or_logical_operator"
+ (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div,
+ mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert"))
+
+;; Return 1 if OP is a binary operator that can be promoted to wider mode.
+;; Modern CPUs have same latency for HImode and SImode multiply,
+;; but 386 and 486 do HImode multiply faster. */
+(define_predicate "promotable_binary_operator"
+ (ior (match_code "plus,and,ior,xor,ashift")
+ (and (match_code "mult")
+ (match_test "ix86_tune > PROCESSOR_I486"))))
+
+;; To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
+;; re-recognize the operand to avoid a copy_to_mode_reg that will fail.
+;;
+;; ??? It seems likely that this will only work because cmpsi is an
+;; expander, and no actual insns use this.
+
+(define_predicate "cmpsi_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (and (match_code "and")
+ (match_code "zero_extract" "0")
+ (match_code "const_int" "1")
+ (match_code "const_int" "01")
+ (match_code "const_int" "02")
+ (match_test "INTVAL (XEXP (XEXP (op, 0), 1)) == 8")
+ (match_test "INTVAL (XEXP (XEXP (op, 0), 2)) == 8")
+ )))
+
+(define_predicate "compare_operator"
+ (match_code "compare"))
+
+(define_predicate "absneg_operator"
+ (match_code "abs,neg"))
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h
new file mode 100644
index 000000000..2da9a7460
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/smmintrin.h
@@ -0,0 +1,836 @@
+/* APPLE LOCAL file 5612787 mainline sse4 */
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _SMMINTRIN_H_INCLUDED
+#define _SMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_1__
+# error "SSE4.1 instruction set not enabled"
+#else
+
+/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
+ files. */
+#include <tmmintrin.h>
+
+/* SSE4.1 */
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NO_EXC 0x08
+
+#define _MM_FROUND_NINT \
+ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR \
+ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL \
+ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC \
+ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* Integer blend instructions - select data from 2 sources using
+ constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
+ (__v8hi)__Y,
+ __M);
+}
+#else
+#define _mm_blend_epi16(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M)))
+#endif
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
+{
+ return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ (__v16qi)__M);
+}
+
+/* Single precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+#else
+#define _mm_blend_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M)))
+#endif
+
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
+{
+ return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
+ (__v4sf)__Y,
+ (__v4sf)__M);
+}
+
+/* Double precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_blend_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M)))
+#endif
+
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
+{
+ return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
+ (__v2df)__Y,
+ (__v2df)__M);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_dp_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M)))
+
+#define _mm_dp_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Min/max packed integer instructions. */
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication with truncation of upper
+ halves of results. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_mullo_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication of 2 pairs of operands
+ with two 64-bit results. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_mul_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) == 0. */
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & ~__M) == 0. */
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) != 0 && (__V & ~__M) != 0. */
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics. */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Insert single precision float into packed single precision array
+ element selected by index N. The bits [7-6] of N define S
+ index, the bits [5-4] define D index, and bits [3-0] define
+ zeroing mask for D. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
+{
+ return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
+ (__v4sf)__S,
+ __N);
+}
+#else
+#define _mm_insert_ps(D, S, N) \
+ ((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N)))
+#endif
+
+/* Helper macro to create the N value for _mm_insert_ps. */
+#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
+
+/* Extract binary representation of single precision float from packed
+ single precision array element of X selected by index N. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+ union { int i; float f; } __tmp;
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
+ return __tmp.i;
+}
+#else
+#define _mm_extract_ps(X, N) \
+ (__extension__ \
+ ({ \
+ union { int i; float f; } __tmp; \
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N)); \
+ __tmp.i; \
+ }) \
+ )
+#endif
+
+/* Extract binary representation of single precision float into
+ D from packed single precision array element of S selected
+ by index N. */
+#define _MM_EXTRACT_FLOAT(D, S, N) \
+ { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
+
+/* Extract specified single precision float element into the lower
+ part of __m128. */
+#define _MM_PICK_OUT_PS(X, N) \
+ _mm_insert_ps (_mm_setzero_ps (), (X), \
+ _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
+
+/* Insert integer, S, into packed integer array element of D
+ selected by index N. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_insert_epi8 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
+ __S, __N);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_insert_epi32 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
+ __S, __N);
+}
+
+#ifdef __x86_64__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
+ __S, __N);
+}
+#endif
+#else
+#define _mm_insert_epi8(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N)))
+
+#define _mm_insert_epi32(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N)))
+
+#ifdef __x86_64__
+#define _mm_insert_epi64(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N)))
+#endif
+#endif
+
+/* Extract integer from packed integer array element of X selected by
+ index N. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
+}
+
+#ifdef __x86_64__
+__STATIC_INLINE long long __attribute__((__always_inline__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
+}
+#endif
+#else
+#define _mm_extract_epi8(X, N) \
+ __builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N))
+#define _mm_extract_epi32(X, N) \
+ __builtin_ia32_vec_ext_v4si ((__v4si) X, (N))
+
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) \
+ ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N)))
+#endif
+#endif
+
+/* Return horizontal packed word minimum and its index in bits [15:0]
+ and bits [18:16] respectively. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_minpos_epu16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
+}
+
+/* Packed/scalar double precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+__STATIC_INLINE __m128d __attribute__((__always_inline__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+ (__v2df)__V,
+ __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+ ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M)))
+
+#define _mm_round_sd(D, V, M) \
+ ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+ (__v4sf)__V,
+ __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+ ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M)))
+
+#define _mm_round_ss(D, V, M) \
+ ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M)))
+#endif
+
+/* Macros for ceil/floor intrinsics. */
+#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
+
+/* Packed integer sign-extension. */
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepi8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
+}
+
+/* Packed integer zero-extension. */
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cvtepu8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
+}
+
+/* Pack 8 double words from 2 operands into 8 words of result with
+ unsigned saturation. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_packus_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+ byte integers in the first 2 operands. Starting offsets within
+ operands are determined by the 3rd mask operand. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
+ (__v16qi)__Y, __M);
+}
+#else
+#define _mm_mpsadbw_epu8(X, Y, M) \
+ ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M)))
+#endif
+
+/* Load double quadword using non-temporal aligned hint. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_stream_load_si128 (__m128i *__X)
+{
+ return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
+}
+
+#ifdef __SSE4_2__
+
+/* These macros specify the source data format. */
+#define SIDD_UBYTE_OPS 0x00
+#define SIDD_UWORD_OPS 0x01
+#define SIDD_SBYTE_OPS 0x02
+#define SIDD_SWORD_OPS 0x03
+
+/* These macros specify the comparison operation. */
+#define SIDD_CMP_EQUAL_ANY 0x00
+#define SIDD_CMP_RANGES 0x04
+#define SIDD_CMP_EQUAL_EACH 0x08
+#define SIDD_CMP_EQUAL_ORDERED 0x0c
+
+/* These macros specify the the polarity. */
+#define SIDD_POSITIVE_POLARITY 0x00
+#define SIDD_NEGATIVE_POLARITY 0x10
+#define SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define SIDD_MASKED_NEGATIVE_POLARITY 0x30
+
+/* These macros specify the output selection in _mm_cmpXstri (). */
+#define SIDD_LEAST_SIGNIFICANT 0x00
+#define SIDD_MOST_SIGNIFICANT 0x40
+
+/* These macros specify the output selection in _mm_cmpXstrm (). */
+#define SIDD_BIT_MASK 0x00
+#define SIDD_UNIT_MASK 0x40
+
+/* Intrinsics for text/string processing. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistrm(X, Y, M) \
+ ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M)))
+#define _mm_cmpistri(X, Y, M) \
+ __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M) \
+ ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M)))
+#define _mm_cmpestri(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#endif
+
+/* Intrinsics for text/string processing and reading values of
+ EFlags. */
+
+#ifdef __OPTIMIZE__
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistra(X, Y, M) \
+ __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M))
+#define _mm_cmpistrc(X, Y, M) \
+ __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M))
+#define _mm_cmpistro(X, Y, M) \
+ __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M))
+#define _mm_cmpistrs(X, Y, M) \
+ __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M))
+#define _mm_cmpistrz(X, Y, M) \
+ __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M))
+
+#define _mm_cmpestra(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpestrc(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpestro(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpestrs(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#define _mm_cmpestrz(X, LX, Y, LY, M) \
+ __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \
+ (__v16qi)(Y), (int)(LY), (M))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Calculate a number of bits set to 1. */
+__STATIC_INLINE int __attribute__((__always_inline__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+ return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+__STATIC_INLINE long long __attribute__((__always_inline__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+ return __builtin_popcountll (__X);
+}
+#endif
+
+/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+_mm_crc32_u8 (unsigned int __C, unsigned char __V)
+{
+ return __builtin_ia32_crc32qi (__C, __V);
+}
+
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+_mm_crc32_u16 (unsigned int __C, unsigned short __V)
+{
+ return __builtin_ia32_crc32hi (__C, __V);
+}
+
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+_mm_crc32_u32 (unsigned int __C, unsigned int __V)
+{
+ return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __x86_64__
+__STATIC_INLINE unsigned long long __attribute__((__always_inline__))
+_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
+{
+ return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+#endif /* __SSE4_2__ */
+
+#endif /* __SSE4_1__ */
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/sse.md b/gcc-4.2.1-5666.3/gcc/config/i386/sse.md
new file mode 100644
index 000000000..40318d83a
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/sse.md
@@ -0,0 +1,6218 @@
+;; GCC machine description for SSE instructions
+;; Copyright (C) 2005, 2006
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+
+;; 16 byte integral modes handled by SSE, minus TImode, which gets
+;; special-cased for TARGET_64BIT.
+(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
+
+;; All 16-byte vector modes handled by SSE
+(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Mix-n-match
+(define_mode_macro SSEMODE12 [V16QI V8HI])
+(define_mode_macro SSEMODE24 [V8HI V4SI])
+(define_mode_macro SSEMODE14 [V16QI V4SI])
+(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
+(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+
+;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for SSE1 as well as SSE2.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+ [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (if_then_else
+ (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
+ (const_string "V4SF")
+ (const_string "TI")))])
+
+(define_expand "movv4sf"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (V4SFmode, operands);
+ DONE;
+})
+
+(define_insn "*movv4sf_internal"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
+ "TARGET_SSE"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ return "movaps\t{%1, %0|%0, %1}";
+ default:
+ abort();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+ operands[2] = CONST0_RTX (V4SFmode);
+})
+
+(define_expand "movv2df"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (V2DFmode, operands);
+ DONE;
+})
+
+(define_insn "*movv2df_internal"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movapd\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (if_then_else
+ (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
+ (const_string "V4SF")
+ (const_string "V2DF")))])
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
+{
+ operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+ operands[2] = CONST0_RTX (DFmode);
+})
+
+(define_expand "push<mode>1"
+ [(match_operand:SSEMODE 0 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move_misalign (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "sse_movups"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movups\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movupd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movupd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movdqu"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse_movntv4sf"
+ [(set (match_operand:V4SF 0 "memory_operand" "=m")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE"
+ "movntps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movntv2df"
+ [(set (match_operand:V2DF 0 "memory_operand" "=m")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movntpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movntv2di"
+ [(set (match_operand:V2DI 0 "memory_operand" "=m")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movntdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntsi"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movnti\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+;; APPLE LOCAL begin 4099020, 4121692
+(define_insn "sse_loadqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=rx")
+ (unspec:V4SI [(zero_extend:V4SI (match_operand:V2SI 1 "memory_operand" "m"))] UNSPEC_LDQ))]
+ "TARGET_SSE"
+ "movq\t{%1, %0|%0, %1}")
+
+;; APPLE LOCAL begin 4279065
+(define_insn "sse_storeqv4si"
+ [(set (match_operand:V2SI 0 "memory_operand" "=m")
+ (unspec:V2SI [(match_operand:V4SI 1 "register_operand" "x")] UNSPEC_STOQ))]
+ "TARGET_SSE"
+ "movq\t{%1, %0|%0, %1}")
+;; APPLE LOCAL end 4279065
+
+(define_insn "sse_movqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(zero_extend:V4SI (subreg:V2SI
+ (match_operand:V4SI 1 "register_operand" "x") 0))] UNSPEC_MOVQ))]
+ "TARGET_SSE"
+ "movq\t{%1, %0|%0, %1}")
+;; APPLE LOCAL end 4099020, 4121692
+
+(define_insn "sse3_lddqu"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
+ UNSPEC_LDQQU))]
+ "TARGET_SSE3"
+ "lddqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "negv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
+
+(define_expand "absv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
+
+(define_expand "addv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
+
+(define_insn "*addv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+ "addps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmaddv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+ "addss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_expand "subv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
+
+(define_insn "*subv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "subps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "subss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_expand "mulv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
+
+(define_insn "*mulv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
+ "mulps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmmulv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
+ "mulss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "SF")])
+
+(define_expand "divv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
+
+(define_insn "*divv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "divps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmdivv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "divss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_rcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ "TARGET_SSE"
+ "rcpps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rcpss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_rsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+ "TARGET_SSE"
+ "rsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rsqrtss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+(define_insn "sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "sqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "sqrtss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+
+(define_expand "smaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (V4SFmode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
+})
+
+(define_insn "*smaxv4sf3_finite"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && flag_finite_math_only
+ && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
+ "maxps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*smaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "maxps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsmaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "maxss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+(define_expand "sminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (V4SFmode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
+})
+
+(define_insn "*sminv4sf3_finite"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && flag_finite_math_only
+ && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
+ "minps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*sminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "minps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "minss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE"
+ "minps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*ieee_smaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE"
+ "maxps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*ieee_sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE2"
+ "minpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*ieee_smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE2"
+ "maxpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_addsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (plus:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (minus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
+ "TARGET_SSE3"
+ "addsubps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_haddv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSE3"
+ "haddps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_hsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSE3"
+ "hsubps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "reduc_splus_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ if (TARGET_SSE3)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
+ }
+ else
+ ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smax_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smin_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_maskcmpv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (match_operator:V4SF 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE"
+ "cmp%D3ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmmaskcmpv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operator:V4SF 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")])
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "cmp%D3ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_comi"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE"
+ "comiss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_ucomi"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU
+ (vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE"
+ "ucomiss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "mode" "SF")])
+
+(define_expand "vcondv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (if_then_else:V4SF
+ (match_operator 3 ""
+ [(match_operand:V4SF 4 "nonimmediate_operand" "")
+ (match_operand:V4SF 5 "nonimmediate_operand" "")])
+ (match_operand:V4SF 1 "general_operand" "")
+ (match_operand:V4SF 2 "general_operand" "")))]
+ "TARGET_SSE"
+{
+ if (ix86_expand_fp_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "andv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+
+(define_insn "*andv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+ "andps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_nandv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "iorv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+
+(define_insn "*iorv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "xorv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+
+(define_insn "*xorv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes register
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*nandsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*iorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (ior:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*xorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (xor:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 3)))]
+ "TARGET_SSE"
+ "cvtpi2ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_cvtps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvtps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvttps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "cvtsi2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "cvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE"
+ "cvttss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtdq2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvttps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhlps\t{%2, %0|%0, %2}
+ movlps\t{%H2, %0|%0, %H2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+; APPLE LOCAL begin radar 4099352
+(define_insn "sse_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0,0")
+ /* APPLE LOCAL mainline */
+ (match_operand:V4SF 2 "nonimmediate_or_0_operand" " C,x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ xorps\t%0, %0
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
+; APPLE LOCAL end radar 4099352
+
+(define_insn "sse_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE"
+ "unpckhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE"
+ "unpcklps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "sse3_movshdup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 7)
+ (const_int 7)])))]
+ "TARGET_SSE3"
+ "movshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_movsldup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 0)
+ (const_int 6)
+ (const_int 6)])))]
+ "TARGET_SSE3"
+ "movsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "sse_shufps"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse_shufps_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "@
+ shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+ movlps\t{%2, %0|%0, %2}
+ movlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_SSE"
+ "shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*sse_concatv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " 0,0")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_SSE"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_initv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*vec_setv4sf_0"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ /* APPLE LOCAL mainline */
+ (match_operand:V4SF 1 "nonimmediate_or_0_operand" " 0,C,C ,0")
+ (const_int 1)))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonmemory_operand" ""))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "register_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "extractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "vec_extractv4sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "negv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
+
+(define_expand "absv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
+
+(define_expand "addv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
+
+(define_insn "*addv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
+ "addpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmaddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+ "addsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_expand "subv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
+
+(define_insn "*subv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "subpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "subsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_expand "mulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
+
+(define_insn "*mulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
+ "mulpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmmulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
+ "mulsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "DF")])
+
+(define_expand "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
+
+(define_insn "*divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "divpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmdivv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "divsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "DF")])
+
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "sqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
+ (match_operand:V2DF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "sqrtsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "DF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+
+(define_expand "smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (V2DFmode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
+})
+
+(define_insn "*smaxv2df3_finite"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && flag_finite_math_only
+ && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
+ "maxpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "maxpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsmaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_expand "sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (V2DFmode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
+})
+
+(define_insn "*sminv2df3_finite"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && flag_finite_math_only
+ && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
+ "minpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "minpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse3_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
+ "TARGET_SSE3"
+ "addsubpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_haddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "haddpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "hsubpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "reduc_splus_v2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")]
+ "TARGET_SSE3"
+{
+ emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_maskcmpv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (match_operator:V2DF 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE2"
+ "cmp%D3pd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmmaskcmpv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (match_operator:V2DF 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cmp%D3sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_comi"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (vec_select:DF
+ (match_operand:V2DF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "comisd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_ucomi"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU
+ (vec_select:DF
+ (match_operand:V2DF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "ucomisd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "mode" "DF")])
+
+(define_expand "vcondv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (if_then_else:V2DF
+ (match_operator 3 ""
+ [(match_operand:V2DF 4 "nonimmediate_operand" "")
+ (match_operand:V2DF 5 "nonimmediate_operand" "")])
+ (match_operand:V2DF 1 "general_operand" "")
+ (match_operand:V2DF 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (ix86_expand_fp_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "andv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
+
+(define_insn "*andv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_nandv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "iorv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
+
+(define_insn "*iorv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "xorv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
+
+(define_insn "*xorv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
+ "xorpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes register
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*anddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*nanddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*iordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (ior:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*xordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (xor:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "xorpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_cvtpi2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+ "TARGET_SSE2"
+ "cvtpi2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvttpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtsi2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")])
+
+(define_insn "sse2_cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")])
+
+(define_insn "sse2_cvtsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "cvttsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SI")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "sse2_cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "sse2_cvtdq2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvtpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvttpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtsd2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse2_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_operand:V2DF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtss2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_expand "sse2_cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SF 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvtpd2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_cvtps2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpckhpd\t{%2, %0|%0, %2}
+ movlpd\t{%H1, %0|%0, %H1}
+ movhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "*sse3_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && reload_completed"
+ [(const_int 0)]
+{
+ rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+ emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+ emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+ DONE;
+})
+
+(define_insn "sse2_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_expand "sse2_shufpd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)));
+ DONE;
+})
+
+(define_insn "sse2_shufpd_1"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhpd\t{%1, %0|%0, %1}
+ unpckhpd\t%0, %0
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov")
+ (set_attr "mode" "V1DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = adjust_address (operands[1], DFmode, 8);
+})
+
+(define_insn "sse2_storelpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlpd\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V1DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (DFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (DFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "sse2_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhpd\t{%2, %0|%0, %2}
+ unpcklpd\t{%2, %0|%0, %2}
+ shufpd\t{$1, %1, %0|%0, %1, 1}
+ #"
+ [(set_attr "type" "ssemov,sselog,sselog,other")
+ (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+ (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_insn "sse2_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
+ (vec_select:DF
+ /* APPLE LOCAL mainline */
+ (match_operand:V2DF 1 "nonimmediate_or_0_operand" " C,0,0,x,o,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movsd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhpd\t{%H1, %0|%0, %H1}
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "register_operand" "")
+ (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhps\t{%H1, %0|%0, %H1}
+ movhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*vec_dupv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "*vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_SSE2"
+ "unpcklpd\t%0, %0"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_concatv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1)))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "*vec_concatv2df"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+ /* APPLE LOCAL mainline */
+ (match_operand:DF 2 "nonimmediate_or_0_operand" " Yt,m,C,x,m")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movsd\t{%1, %0|%0, %1}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
+
+(define_expand "vec_setv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv2df"
+ [(match_operand:DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (minus:SSEMODEI
+ (match_dup 2)
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "add<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+
+(define_insn "*add<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "padd<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_ssadd<mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (ss_plus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
+ "padds<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_usadd<mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (us_plus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
+ "paddus<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "sub<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+
+(define_insn "*sub<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (minus:SSEMODEI
+ (match_operand:SSEMODEI 1 "register_operand" "0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psub<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_sssub<mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (ss_minus:SSEMODE12
+ (match_operand:SSEMODE12 1 "register_operand" "0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubs<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_ussub<mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (us_minus:SSEMODE12
+ (match_operand:SSEMODE12 1 "register_operand" "0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubus<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "mulv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx t[12], op0;
+ int i;
+
+ for (i = 0; i < 12; ++i)
+ t[i] = gen_reg_rtx (V16QImode);
+
+ /* Unpack data such that we've got a source byte in each low byte of
+ each word. We don't care what goes into the high byte of each word.
+ Rather than trying to get zero in there, most convenient is to let
+ it be a copy of the low byte. */
+ emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
+ emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
+
+ /* Multiply words. The end-of-line annotations here give a picture of what
+ the output of that instruction looks like. Dot means don't care; the
+ letters are the bytes of the result with A being the most significant. */
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
+ gen_lowpart (V8HImode, t[0]),
+ gen_lowpart (V8HImode, t[1])));
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
+ gen_lowpart (V8HImode, t[2]),
+ gen_lowpart (V8HImode, t[3])));
+
+ /* Extract the relevant bytes and merge them back together. */
+ emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
+ emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
+ emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
+ emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
+ emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
+ emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
+
+ op0 = operands[0];
+ emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
+ DONE;
+})
+
+(define_expand "mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_expand "smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+(define_insn "sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V4SImode);
+ t5 = gen_reg_rtx (V4SImode);
+ t6 = gen_reg_rtx (V4SImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply elements 2 and 0. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
+
+ /* Shift both input vectors down one element, so that elements 3 and 1
+ are now in the slots for elements 2 and 0. For K8, at least, this is
+ faster than using a shuffle. */
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, op1), thirtytwo));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, op2), thirtytwo));
+
+ /* Multiply elements 3 and 1. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
+
+ /* Move the results in element 2 down to element 1; we don't care what
+ goes in elements 2 and 3. */
+ emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+ emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+
+ /* Merge the parts back together. */
+ emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+ DONE;
+})
+
+(define_expand "mulv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V2DImode);
+ t2 = gen_reg_rtx (V2DImode);
+ t3 = gen_reg_rtx (V2DImode);
+ t4 = gen_reg_rtx (V2DImode);
+ t5 = gen_reg_rtx (V2DImode);
+ t6 = gen_reg_rtx (V2DImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply low parts. */
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
+ gen_lowpart (V4SImode, op2)));
+
+ /* Shift input vectors left 32 bits so we can multiply high parts. */
+ emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
+ emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
+
+ /* Multiply high parts by low parts. */
+ emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
+ gen_lowpart (V4SImode, t3)));
+ emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
+ gen_lowpart (V4SImode, t2)));
+
+ /* Shift them back. */
+ emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
+ emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
+
+ /* Add the three parts together. */
+ emit_insn (gen_addv2di3 (t6, t1, t4));
+ emit_insn (gen_addv2di3 (op0, t6, t5));
+ DONE;
+})
+
+(define_expand "sdot_prodv8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")
+ (match_operand:V4SI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
+ emit_insn (gen_addv4si3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")
+ (match_operand:V2DI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4;
+
+ t1 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
+ emit_insn (gen_addv2di3 (t1, t1, operands[3]));
+
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (32)));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, operands[2]),
+ GEN_INT (32)));
+
+ t4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
+
+ emit_insn (gen_addv2di3 (operands[0], t1, t4));
+ DONE;
+})
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "*sse4_1_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmulld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "vec_widen_smult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_insn "ashr<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (ashiftrt:SSEMODE24
+ (match_operand:SSEMODE24 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
+ "TARGET_SSE2"
+ "psra<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshr<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (lshiftrt:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
+ "TARGET_SSE2"
+ "psrl<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashl<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (ashift:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
+ "TARGET_SSE2"
+ "psll<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+/* APPLE LOCAL begin 6440204 */
+/* Moved to i386.md. */
+/* APPLE LOCAL end 6440204 */
+
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (!const_0_to_255_mul_8_operand (operands[2], SImode))
+ FAIL;
+ operands[0] = gen_lowpart (TImode, operands[0]);
+ operands[1] = gen_lowpart (TImode, operands[1]);
+})
+
+;; APPLE LOCAL begin mainline 5951842
+;; moved sse2_lshrti3 to i386.md
+;; APPLE LOCAL end mainline 5951842
+
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (!const_0_to_255_mul_8_operand (operands[2], SImode))
+ FAIL;
+ operands[0] = gen_lowpart (TImode, operands[0]);
+ operands[1] = gen_lowpart (TImode, operands[1]);
+})
+
+(define_expand "umaxv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+ (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
+
+(define_insn "*umaxv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
+ "pmaxub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "smaxv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
+
+(define_insn "*smaxv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
+ "pmaxsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "umaxv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (set (match_dup 3)
+ (plus:V8HI (match_dup 0) (match_dup 2)))]
+ "TARGET_SSE2"
+{
+ operands[3] = operands[0];
+ if (rtx_equal_p (operands[0], operands[2]))
+ operands[0] = gen_reg_rtx (V8HImode);
+})
+
+(define_expand "smax<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "umaxv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+})
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "sse4_1_smax<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+ (smax:SSEMODE14
+ (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
+ "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_umax<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (umax:SSEMODE24
+ (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
+ "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "uminv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+ (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
+
+(define_insn "*uminv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
+ "pminub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "sminv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
+
+(define_insn "*sminv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
+ "pminsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_expand "smin<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "umin<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "")
+ (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
+ (match_operand:SSEMODE24 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_eq<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (eq:SSEMODE124
+ (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "sse4_1_eqv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (eq:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
+ "pcmpeqq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_insn "sse2_gt<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (gt:SSEMODE124
+ (match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "sse4_2_gtv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (gt:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_2"
+ "pcmpgtq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "vcond<mode>"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "")
+ (if_then_else:SSEMODE124
+ (match_operator 3 ""
+ [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODE124 1 "general_operand" "")
+ (match_operand:SSEMODE124 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (ix86_expand_int_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "")
+ (if_then_else:SSEMODE124
+ (match_operator 3 ""
+ [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODE124 1 "general_operand" "")
+ (match_operand:SSEMODE124 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (ix86_expand_int_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "one_cmpl<mode>2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_dup 2)))]
+ "TARGET_SSE2"
+{
+ int i, n = GET_MODE_NUNITS (<MODE>mode);
+ rtvec v = rtvec_alloc (n);
+
+ for (i = 0; i < n; ++i)
+ RTVEC_ELT (v, i) = constm1_rtx;
+
+ operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
+})
+
+(define_expand "and<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
+
+(define_insn "*and<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+ "pand\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_nand<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_expand "ior<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
+
+(define_insn "*ior<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (ior:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
+ "por\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_expand "xor<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
+
+(define_insn "*xor<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (xor:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "pxor\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "sse4_1_smin<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+ (smin:SSEMODE14
+ (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
+ "pmins<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_umin<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (umin:SSEMODE24
+ (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
+ "pminu<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_expand "vec_interleave_highv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_packsswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (ss_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "0"))
+ (ss_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packsswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packssdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (ss_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "0"))
+ (ss_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packssdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packuswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (us_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "0"))
+ (us_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packuswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_SSE2"
+ "punpckhbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_SSE2"
+ "punpcklbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_SSE2"
+ "punpckhwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_SSE2"
+ "punpcklwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhdq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE2"
+ "punpckhdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckldq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE2"
+ "punpckldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "punpckhqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "punpcklqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "*sse4_1_pinsrb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (match_operand:QI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "sse2_pinsrw"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (vec_merge:V8HI
+ (vec_duplicate:V8HI
+ (match_operand:SI 2 "nonimmediate_operand" ""))
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:SI 3 "const_0_to_7_operand" "")))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_lowpart (HImode, operands[2]);
+ operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
+})
+
+(define_insn "*sse2_pinsrw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (vec_duplicate:V8HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;; It must come before sse2_loadld since it is preferred.
+(define_insn "*sse4_1_pinsrd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
+ "TARGET_SSE4_1"
+ "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb_memory"
+ [(set (match_operand:QI 0 "memory_operand" "=m")
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_insn "sse2_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
+ "TARGET_SSE2"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "*sse4_1_pextrw_memory"
+ [(set (match_operand:HI 0 "memory_operand" "=m")
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; It must come before *vec_extractv2di_1_sse since it is preferred.
+(define_insn "*sse4_1_pextrq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+ "TARGET_SSE4_1 && TARGET_64BIT"
+ "pextrq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "sse2_pshufd"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "sse2_pshufd_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "pshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshuflw"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "sse2_pshuflw_1"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufhw"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse2_pshufhw_1"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (match_operand 2 "const_4_to_7_operand" "")
+ (match_operand 3 "const_4_to_7_operand" "")
+ (match_operand 4 "const_4_to_7_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= (INTVAL (operands[2]) - 4) << 0;
+ mask |= (INTVAL (operands[3]) - 4) << 2;
+ mask |= (INTVAL (operands[4]) - 4) << 4;
+ mask |= (INTVAL (operands[5]) - 4) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_loadd"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (match_dup 2)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "operands[2] = CONST0_RTX (V4SImode);")
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn "sse2_loadld"
+ [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movd\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI,TI,V4SF,SF")])
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
+;; be taken into account, and movdi isn't fully populated even without.
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_insn_and_split "sse2_stored"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x,Yi")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "#"
+ "&& reload_completed
+ && (TARGET_INTER_UNIT_MOVES
+ || MEM_P (operands [0])
+ || !GENERAL_REGNO_P (true_regnum (operands [0])))"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
+})
+;; APPLE LOCAL end 5612787 mainline sse4
+
+(define_expand "sse_storeq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "")
+
+;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
+;; be taken into account, and movdi isn't fully populated even without.
+(define_insn "*sse2_storeq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+})
+
+(define_insn "*vec_extractv2di_1_sse2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ psrldq\t{$8, %0|%0, 8}
+ movq\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "memory" "*,none,*")
+ (set_attr "mode" "V2SF,TI,TI")])
+
+;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
+(define_insn "*vec_extractv2di_1_sse"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_dupv4si"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "register_operand" " Yt,0")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ pshufd\t{$0, %1, %0|%0, %1, 0}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_dupv2di"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "register_operand" " 0 ,0")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t%0, %0
+ movlhps\t%0, %0"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "TI,V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse2_concatv2si"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
+ (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE2"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "TI,TI,DI,DI")])
+
+(define_insn "*sse1_concatv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
+ (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,V4SF,DI,DI")])
+
+(define_insn "*vec_concatv4si_1"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
+ (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di"
+;; APPLE LOCAL begin 5612787 mainline sse4
+ [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
+ /* APPLE LOCAL mainline */
+ (match_operand:DI 2 "nonimmediate_or_0_operand" " C, C,Yt,x,m,0")))]
+;; APPLE LOCAL end 5612787 mainline sse4
+ "TARGET_SSE"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
+ (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
+
+(define_expand "vec_setv2di"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:DI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv2di"
+ [(match_operand:DI 0 "register_operand" "")
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2di"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4si"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv4si"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv4si"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv8hi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv8hi"
+ [(match_operand:HI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv8hi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv16qi"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv16qi"
+ [(match_operand:QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv16qi"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+(define_expand "vec_unpacku_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+;; APPLE LOCAL end 5612787 mainline sse4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (truncate:V16QI
+ (lshiftrt:V16HI
+ (plus:V16HI
+ (plus:V16HI
+ (zero_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V16HI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+ "pavgb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (plus:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+ "pavgw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+;; The correct representation for this is absolutely enormous, and
+;; surely not generally useful.
+(define_insn "sse2_psadbw"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSADBW))]
+ "TARGET_SSE2"
+ "psadbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse_movmskps"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE"
+ "movmskps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movmskpd"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "movmskpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "pmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_maskmovdqu"
+ [(set (match_operand:V16QI 0 "memory_operand" "")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")
+ (match_dup 0)]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_maskmovdqu"
+ [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")
+ (mem:V16QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2 && !TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_maskmovdqu_rex64"
+ [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")
+ (mem:V16QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse_ldmxcsr"
+ [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+ UNSPECV_LDMXCSR)]
+ "TARGET_SSE"
+ "ldmxcsr\t%0"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "load")])
+
+(define_insn "sse_stmxcsr"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+ "TARGET_SSE"
+ "stmxcsr\t%0"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "store")])
+
+(define_expand "sse_sfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse_sfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "sfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_insn "sse2_clflush"
+ [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+ UNSPECV_CLFLUSH)]
+ "TARGET_SSE2"
+ "clflush\t%a0"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_mfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_SSE2"
+ "mfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_lfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_lfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+ "lfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_insn "sse3_mwait"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")]
+ UNSPECV_MWAIT)]
+ "TARGET_SSE3"
+;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+ "mwait"
+ [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && !TARGET_64BIT"
+ "monitor\t%0, %1, %2"
+ [(set_attr "length" "3")])
+; APPLE LOCAL begin mainline
+;; SSSE3
+(define_insn "ssse3_phaddwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_phadddv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phadddv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSSE3"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_phaddswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_phsubwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_phsubdv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubdv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSSE3"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_phsubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_pmaddubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V4QI
+ ;; APPLE LOCAL 6025404
+ (match_operand:V16QI 1 "nonimmediate_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_SSSE3"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ss_plus:V4HI
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V4QI
+ ;; APPLE LOCAL 6025404
+ (match_operand:V8QI 1 "nonimmediate_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V8QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4HI
+ (vec_select:V8QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSSE3"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmulhrswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 14))
+ (const_vector:V4HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_pshufbv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSHUFB))]
+ "TARGET_SSSE3"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSHUFB))]
+ "TARGET_SSSE3"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
+
+(define_insn "ssse3_psign<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
+ "TARGET_SSSE3"
+ "psign<ssevecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "ssse3_psign<mode>3"
+ [(set (match_operand:MMXMODE124 0 "register_operand" "=y")
+ (unspec:MMXMODE124 [(match_operand:MMXMODE124 1 "register_operand" "0")
+ (match_operand:MMXMODE124 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSIGN))]
+ "TARGET_SSSE3"
+ "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "ssse3_palignrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_SSSE3"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "ssse3_palignrv1di"
+ [(set (match_operand:V1DI 0 "register_operand" "=y")
+ (unspec:V1DI [(match_operand:V1DI 1 "register_operand" "0")
+ (match_operand:V1DI 2 "nonimmediate_operand" "ym")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_SSSE3"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+
+(define_insn "ssse3_pabs<mode>2"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSSE3"
+ "pabs<ssevecsize>\t{%1, %0|%0, %1}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+;; APPLE LOCAL begin 4656532 use V1DImode for _m64
+(define_insn "ssse3_pabs<mode>2"
+ [(set (match_operand:MMXMODE124 0 "register_operand" "=y")
+ (abs:MMXMODE124 (match_operand:MMXMODE124 1 "nonimmediate_operand" "ym")))]
+ "TARGET_SSSE3"
+ "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
+;; APPLE LOCAL end 4656532 use V1DImode for _m64
+; APPLE LOCAL end mainline
+
+(define_insn "sse3_monitor64"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && TARGET_64BIT"
+;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used. Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+ "monitor"
+ [(set_attr "length" "3")])
+
+;; APPLE LOCAL begin 5612787 mainline sse4
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_vmmovntv2df"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (unspec:DF [(vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse4a_movntdf"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse4a_vmmovntv4sf"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (unspec:SF [(vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse4a_movntsf"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse4a_extrqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")]
+ UNSPEC_EXTRQI))]
+ "TARGET_SSE4A"
+ "extrq\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_EXTRQ))]
+ "TARGET_SSE4A"
+ "extrq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" "")]
+ UNSPEC_INSERTQI))]
+ "TARGET_SSE4A"
+ "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_INSERTQ))]
+ "TARGET_SSE4A"
+ "insertq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.1 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4_1_blendpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_3_operand" "n")))]
+ "TARGET_SSE4_1"
+ "blendpd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_blendps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")))]
+ "TARGET_SSE4_1"
+ "blendps\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_blendvpd"
+ [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
+ (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:V2DF 3 "register_operand" "Y0")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "blendvpd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_blendvps"
+ [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
+ (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:V4SF 3 "register_operand" "Y0")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "blendvps\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_dppd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_SSE4_1"
+ "dppd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_dpps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_SSE4_1"
+ "dpps\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_movntdqa"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
+ UNSPEC_MOVNTDQA))]
+ "TARGET_SSE4_1"
+ "movntdqa\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mpsadbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_MPSADBW))]
+ "TARGET_SSE4_1"
+ "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_packusdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (us_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "0"))
+ (us_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE4_1"
+ "packusdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendvb"
+ [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
+ (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:V16QI 3 "register_operand" "Y0")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "pblendvb\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_SSE4_1"
+ "pblendw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_phminposuw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PHMINPOSUW))]
+ "TARGET_SSE4_1"
+ "phminposuw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+;; But it is not a really compare instruction.
+(define_insn "sse4_1_ptest"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1"
+ "ptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_roundpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "roundpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_roundps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "roundps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_roundsd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:V2DF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE4_1"
+ "roundsd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_roundss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE4_1"
+ "roundss\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.2 string/text processing instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "sse4_2_pcmpestr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "register_operand" "x,x")
+ (match_operand:SI 3 "register_operand" "a,a")
+ (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
+ (match_operand:SI 5 "register_operand" "d,d")
+ (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
+ operands[4], operands[5],
+ operands[6]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
+ (match_operand:SI 1 "register_operand" "a,a,a,a")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 3 "register_operand" "d,d,d,d")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPESTR))
+ (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 6 "= X, X,c,c"))]
+ "TARGET_SSE4_2"
+ "@
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse4_2_pcmpistr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "register_operand" "x,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+ operands[3], operands[4]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+ operands[3], operands[4]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
+ operands[4]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPISTR))
+ (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 4 "= X, X,c,c"))]
+ "TARGET_SSE4_2"
+ "@
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "mode" "TI")])
+;; APPLE LOCAL end 5612787 mainline sse4
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/sync.md b/gcc-4.2.1-5666.3/gcc/config/i386/sync.md
new file mode 100644
index 000000000..8c2fdb230
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/sync.md
@@ -0,0 +1,291 @@
+;; GCC machine description for i386 synchronization instructions.
+;; Copyright (C) 2005, 2006
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_mode_macro IMODE [QI HI SI (DI "TARGET_64BIT")])
+(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+(define_mode_attr modeconstraint [(QI "q") (HI "r") (SI "r") (DI "r")])
+(define_mode_attr immconstraint [(QI "i") (HI "i") (SI "i") (DI "e")])
+
+(define_mode_macro CASMODE [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+ (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_macro DCASMODE
+ [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+ (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+;; ??? It would be possible to use cmpxchg8b on pentium for DImode
+;; changes. It's complicated because the insn uses ecx:ebx as the
+;; new value; note that the registers are reversed from the order
+;; that they'd be in with (reg:DI 2 ecx). Similarly for TImode
+;; data in 64-bit mode.
+
+(define_expand "sync_compare_and_swap<mode>"
+ [(parallel
+ [(set (match_operand:CASMODE 0 "register_operand" "")
+ (match_operand:CASMODE 1 "memory_operand" ""))
+ (set (match_dup 1)
+ (unspec_volatile:CASMODE
+ [(match_dup 1)
+ (match_operand:CASMODE 2 "register_operand" "")
+ (match_operand:CASMODE 3 "register_operand" "")]
+ UNSPECV_CMPXCHG_1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_CMPXCHG"
+{
+ if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+ {
+ enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
+ rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
+ rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
+ GET_MODE_SIZE (hmode));
+ low = force_reg (hmode, low);
+ high = force_reg (hmode, high);
+ if (<MODE>mode == DImode)
+ emit_insn (gen_sync_double_compare_and_swapdi
+ (operands[0], operands[1], operands[2], low, high));
+ else if (<MODE>mode == TImode)
+ emit_insn (gen_sync_double_compare_and_swapti
+ (operands[0], operands[1], operands[2], low, high));
+ else
+ gcc_unreachable ();
+ DONE;
+ }
+})
+
+(define_insn "*sync_compare_and_swap<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=a")
+ (match_operand:IMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:IMODE
+ [(match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "a")
+ (match_operand:IMODE 3 "register_operand" "<modeconstraint>")]
+ UNSPECV_CMPXCHG_1))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_CMPXCHG"
+ "lock\;cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}")
+
+(define_insn "sync_double_compare_and_swap<mode>"
+ [(set (match_operand:DCASMODE 0 "register_operand" "=A")
+ (match_operand:DCASMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DCASMODE
+ [(match_dup 1)
+ (match_operand:DCASMODE 2 "register_operand" "A")
+ (match_operand:<DCASHMODE> 3 "register_operand" "b")
+ (match_operand:<DCASHMODE> 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG_1))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;cmpxchg<doublemodesuffix>b\t%1")
+
+;; Theoretically we'd like to use constraint "r" (any reg) for operand
+;; 3, but that includes ecx. If operand 3 and 4 are the same (like when
+;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
+;; operand 4. This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom. Operands 3 and 4 really need to be different
+;; registers, which in this case means operand 3 must not be ecx.
+;; Instead of playing tricks with fake early clobbers or the like we
+;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; are just esi and edi.
+(define_insn "*sync_double_compare_and_swapdi_pic"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (match_operand:DI 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DI
+ [(match_dup 1)
+ (match_operand:DI 2 "register_operand" "A")
+ (match_operand:SI 3 "register_operand" "SD")
+ (match_operand:SI 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG_1))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
+ "xchg{l}\t%%ebx, %3\;lock\;cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+
+(define_expand "sync_compare_and_swap_cc<mode>"
+ [(parallel
+ [(set (match_operand:CASMODE 0 "register_operand" "")
+ (match_operand:CASMODE 1 "memory_operand" ""))
+ (set (match_dup 1)
+ (unspec_volatile:CASMODE
+ [(match_dup 1)
+ (match_operand:CASMODE 2 "register_operand" "")
+ (match_operand:CASMODE 3 "register_operand" "")]
+ UNSPECV_CMPXCHG_1))
+ (set (match_dup 4)
+ (compare:CCZ
+ (unspec_volatile:CASMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG_2)
+ (match_dup 2)))])]
+ "TARGET_CMPXCHG"
+{
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ ix86_compare_op0 = operands[3];
+ ix86_compare_op1 = NULL;
+ ix86_compare_emitted = operands[4];
+ if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+ {
+ enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
+ rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
+ rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
+ GET_MODE_SIZE (hmode));
+ low = force_reg (hmode, low);
+ high = force_reg (hmode, high);
+ if (<MODE>mode == DImode)
+ emit_insn (gen_sync_double_compare_and_swap_ccdi
+ (operands[0], operands[1], operands[2], low, high));
+ else if (<MODE>mode == TImode)
+ emit_insn (gen_sync_double_compare_and_swap_ccti
+ (operands[0], operands[1], operands[2], low, high));
+ else
+ gcc_unreachable ();
+ DONE;
+ }
+})
+
+(define_insn "*sync_compare_and_swap_cc<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=a")
+ (match_operand:IMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:IMODE
+ [(match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "a")
+ (match_operand:IMODE 3 "register_operand" "<modeconstraint>")]
+ UNSPECV_CMPXCHG_1))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:IMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG_2)
+ (match_dup 2)))]
+ "TARGET_CMPXCHG"
+ "lock\;cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}")
+
+(define_insn "sync_double_compare_and_swap_cc<mode>"
+ [(set (match_operand:DCASMODE 0 "register_operand" "=A")
+ (match_operand:DCASMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DCASMODE
+ [(match_dup 1)
+ (match_operand:DCASMODE 2 "register_operand" "A")
+ (match_operand:<DCASHMODE> 3 "register_operand" "b")
+ (match_operand:<DCASHMODE> 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG_1))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:DCASMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+ UNSPECV_CMPXCHG_2)
+ (match_dup 2)))]
+ ""
+ "lock\;cmpxchg<doublemodesuffix>b\t%1")
+
+;; See above for the explanation of using the constraint "SD" for
+;; operand 3.
+(define_insn "*sync_double_compare_and_swap_ccdi_pic"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (match_operand:DI 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DI
+ [(match_dup 1)
+ (match_operand:DI 2 "register_operand" "A")
+ (match_operand:SI 3 "register_operand" "SD")
+ (match_operand:SI 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG_1))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:DI
+ [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+ UNSPECV_CMPXCHG_2)
+ (match_dup 2)))]
+ "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
+ "xchg{l}\t%%ebx, %3\;lock\;cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+
+(define_insn "sync_old_add<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>")
+ (unspec_volatile:IMODE
+ [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG))
+ (set (match_dup 1)
+ (plus:IMODE (match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_XADD"
+ "lock\;xadd{<modesuffix>}\t{%0, %1|%1, %0}")
+
+;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
+(define_insn "sync_lock_test_and_set<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>")
+ (unspec_volatile:IMODE
+ [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG))
+ (set (match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "0"))]
+ ""
+ "xchg{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_add<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(plus:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;add{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_sub<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(minus:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;sub{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_ior<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(ior:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;or{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_and<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(and:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;and{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_xor<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(xor:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock\;xor{<modesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin
new file mode 100644
index 000000000..5e6df6912
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin
@@ -0,0 +1,22 @@
+SHLIB_VERPFX = $(srcdir)/config/i386/darwin-libgcc
+# APPLE LOCAL begin 4099000
+LIB1ASMSRC = i386/lib1funcs.asm
+LIB1ASMFUNCS = _get_pc_thunk_ax _get_pc_thunk_dx _get_pc_thunk_cx _get_pc_thunk_bx _get_pc_thunk_si _get_pc_thunk_di _get_pc_thunk_bp
+# APPLE LOCAL end 4099000
+# APPLE LOCAL avoid try fat on thin system
+ifneq ($(shell lipo -info /usr/lib/libSystem.B.dylib | grep x86_64),)
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = x86_64
+# APPLE LOCAL avoid try fat on thin system
+endif
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
+
+# APPLE LOCAL begin gcov 5573505
+# -pipe because there's an assembler bug, 4077127, which causes
+# it to not properly process the first # directive, causing temporary
+# file names to appear in stabs, causing the bootstrap to fail. Using -pipe
+# works around this by not having any temporary file names.
+TARGET_LIBGCC2_CFLAGS = -fPIC -pipe
+TARGET_LIBGCC2_STATIC_CFLAGS = -mmacosx-version-min=10.4
+# APPLE LOCAL end gcov 5573505
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64 b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64
new file mode 100644
index 000000000..3670a125b
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-darwin64
@@ -0,0 +1,12 @@
+SHLIB_VERPFX = $(srcdir)/config/i386/darwin-libgcc
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
+
+# APPLE LOCAL begin gcov 5573505
+# -pipe because there's an assembler bug, 4077127, which causes
+# it to not properly process the first # directive, causing temporary
+# file names to appear in stabs, causing the bootstrap to fail. Using -pipe
+# works around this by not having any temporary file names.
+TARGET_LIBGCC2_CFLAGS = -fPIC -pipe
+TARGET_LIBGCC2_STATIC_CFLAGS = -mmacosx-version-min=10.4
+# APPLE LOCAL end gcov 5573505
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc b/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc
new file mode 100644
index 000000000..c37f8a759
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/t-gmm_malloc
@@ -0,0 +1,6 @@
+# Install gmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/gmm_malloc.h
+ rm -f $@
+ cat $^ > $@
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h
new file mode 100644
index 000000000..1bb254bfe
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/tmmintrin.h
@@ -0,0 +1,304 @@
+/* APPLE LOCAL file ssse3 4424835 */
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.1. */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+#ifdef __SSSE3__
+#include <pmmintrin.h>
+
+/* APPLE LOCAL begin nodebug inline */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+/* APPLE LOCAL begin 5814283 */
+#define _mm_alignr_epi8(__X, __Y, __N) \
+ ((__m128i)__builtin_ia32_palignr128 ((__v2di)(__X), (__v2di)(__Y), (__N) * 8))
+/* APPLE LOCAL end 5814283 */
+
+#define _mm_alignr_pi8(__X, __Y, __N) \
+ ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128i __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_pi8 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_pi16 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_abs_pi32 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+/* APPLE LOCAL begin nodebug inline */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline */
+
+#endif /* __SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin
new file mode 100644
index 000000000..025c5f4fb
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin
@@ -0,0 +1,8 @@
+host-i386-darwin.o : $(srcdir)/config/i386/host-i386-darwin.c \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \
+ config/host-darwin.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+# APPLE LOCAL begin use -mdynamic-no-pic to build x86-hosted compilers
+
+XCFLAGS = -mdynamic-no-pic
+# APPLE LOCAL end use -mdynamic-no-pic to build x86-hosted compilers
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64 b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64
new file mode 100644
index 000000000..58e9f6753
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-darwin-x86_64
@@ -0,0 +1,8 @@
+# APPLE LOCAL file mainline
+# This file should go away.
+host-x86_64-darwin.o : $(srcdir)/config/i386/host-x86_64-darwin.c \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \
+ config/host-darwin.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+XCFLAGS =
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/x-i386 b/gcc-4.2.1-5666.3/gcc/config/i386/x-i386
new file mode 100644
index 000000000..e156bcde3
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/x-i386
@@ -0,0 +1,3 @@
+driver-i386.o : $(srcdir)/config/i386/driver-i386.c \
+ $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h b/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h
new file mode 100644
index 000000000..ad805b866
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/xmmintrin.h
@@ -0,0 +1,1582 @@
+/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+#ifndef __SSE__
+# error "SSE instruction set not enabled"
+#else
+
+/* We need type definitions from the MMX header file. */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free (). */
+/* APPLE LOCAL begin xmmintrin.h for kernel 4123064 */
+#if __STDC_HOSTED__
+#include <mm_malloc.h>
+#endif
+/* APPLE LOCAL end xmmintrin.h for kernel 4123064 */
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create a selector for use with the SHUFPS instruction. */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
+/* Constants for use with _mm_prefetch. */
+enum _mm_hint
+{
+ _MM_HINT_T0 = 3,
+ _MM_HINT_T1 = 2,
+ _MM_HINT_T2 = 1,
+ _MM_HINT_NTA = 0
+};
+
+/* Bits in the MXCSR. */
+#define _MM_EXCEPT_MASK 0x003f
+#define _MM_EXCEPT_INVALID 0x0001
+#define _MM_EXCEPT_DENORM 0x0002
+#define _MM_EXCEPT_DIV_ZERO 0x0004
+#define _MM_EXCEPT_OVERFLOW 0x0008
+#define _MM_EXCEPT_UNDERFLOW 0x0010
+#define _MM_EXCEPT_INEXACT 0x0020
+
+#define _MM_MASK_MASK 0x1f80
+#define _MM_MASK_INVALID 0x0080
+#define _MM_MASK_DENORM 0x0100
+#define _MM_MASK_DIV_ZERO 0x0200
+#define _MM_MASK_OVERFLOW 0x0400
+#define _MM_MASK_UNDERFLOW 0x0800
+#define _MM_MASK_INEXACT 0x1000
+
+#define _MM_ROUND_MASK 0x6000
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#define __always_inline__ __always_inline__, __nodebug__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* APPLE LOCAL begin radar 5618945 */
+#undef __STATIC_INLINE
+#ifdef __GNUC_STDC_INLINE__
+#define __STATIC_INLINE __inline
+#else
+#define __STATIC_INLINE static __inline
+#endif
+/* APPLE LOCAL end radar 5618945 */
+
+/* Create a vector of zeros. */
+/* APPLE LOCAL begin radar 4152603 */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setzero_ps (void)
+{
+ return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+ floating-point) values of A and B; the upper three SPFP values are
+ passed through from A. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_rcp_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_rsqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform the respective operation on the four SPFP values in A and B. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_rcp_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_rsqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform logical bit-wise operations on 128-bit values. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andps (__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andnps (__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_orps (__A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_xorps (__A, __B);
+}
+
+/* Perform a comparison on the lower SPFP values of A and B. If the
+ comparison is true, place a mask of all ones in the result, otherwise a
+ mask of zeros. The upper three SPFP values are passed through from A. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform a comparison on the four SPFP values of A and B. For each
+ element, if the comparison is true, place a mask of all ones in the
+ result, otherwise a mask of zeros. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+ and 0 if false. */
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvt_ss2si (__m128 __A)
+{
+ return _mm_cvtss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the
+ current rounding mode. */
+
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+ current rounding mode. Return the integers in packed form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvt_ps2pi (__m128 __A)
+{
+ return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtt_ss2si (__m128 __A)
+{
+ return _mm_cvttss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE long long __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Truncate the two lower SPFP values to 32-bit integers. Return the
+ integers in packed form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvttps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtt_ps2pi (__m128 __A)
+{
+ return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+ return _mm_cvtsi32_ss (__A, __B);
+}
+
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+
+/* Intel intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+ as the two lower elements in A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+ return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+ return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpi16_ps (__m64 __A)
+{
+ __v4hi __sign;
+ __v2si __hisi, __losi;
+ __v4sf __r;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+
+ /* Convert the four words to doublewords. */
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __r = (__v4sf) _mm_setzero_ps ();
+ __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
+ __r = __builtin_ia32_movlhps (__r, __r);
+ __r = __builtin_ia32_cvtpi2ps (__r, __losi);
+
+ return (__m128) __r;
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpu16_ps (__m64 __A)
+{
+ __v2si __hisi, __losi;
+ __v4sf __r;
+
+ /* Convert the four words to doublewords. */
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __r = (__v4sf) _mm_setzero_ps ();
+ __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
+ __r = __builtin_ia32_movlhps (__r, __r);
+ __r = __builtin_ia32_cvtpi2ps (__r, __losi);
+
+ return (__m128) __r;
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpi8_ps (__m64 __A)
+{
+ __v8qi __sign;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+
+ /* Convert the four low bytes to words. */
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
+
+ return _mm_cvtpi16_ps(__A);
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpu8_ps(__m64 __A)
+{
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
+ return _mm_cvtpu16_ps(__A);
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+ __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+ __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+ __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B);
+ return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtps_pi16(__m128 __A)
+{
+ __v4sf __hisf = (__v4sf)__A;
+ __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+ __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+ __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+ return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtps_pi8(__m128 __A)
+{
+ __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+ return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
+}
+
+/* Selects four specific SPFP values from A and B based on MASK. */
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask)
+{
+ return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
+}
+#else
+#define _mm_shuffle_ps(A, B, MASK) \
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
+#endif
+
+
+/* Selects and interleaves the upper two SPFP values from A and B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+ the lower two values are passed through from A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
+}
+
+/* Stores the upper two SPFP values of A into P. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
+}
+
+/* Moves the upper two values of B into the lower two values of A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Moves the lower two values of B into the upper two values of A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+ the upper two values are passed through from A. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
+}
+
+/* Stores the lower two SPFP values of A into P. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
+}
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movemask_ps (__m128 __A)
+{
+ return __builtin_ia32_movmskps ((__v4sf)__A);
+}
+
+/* Return the contents of the control register. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_getcsr (void)
+{
+ return __builtin_ia32_stmxcsr ();
+}
+
+/* Read exception bits from the control register. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_GET_EXCEPTION_STATE (void)
+{
+ return _mm_getcsr() & _MM_EXCEPT_MASK;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_GET_EXCEPTION_MASK (void)
+{
+ return _mm_getcsr() & _MM_MASK_MASK;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_GET_ROUNDING_MODE (void)
+{
+ return _mm_getcsr() & _MM_ROUND_MASK;
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE unsigned int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
+ return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
+}
+
+/* Set the control register to I. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setcsr (unsigned int __I)
+{
+ __builtin_ia32_ldmxcsr (__I);
+}
+
+/* Set exception bits in the control register. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
+}
+
+/* Create a vector with element 0 as F and the rest zero. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_ss (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, 0, 0, 0 };
+}
+
+/* Create a vector with all four elements equal to F. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set1_ps (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_ps1 (float __F)
+{
+ return _mm_set1_ps (__F);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_ss (float const *__P)
+{
+ return _mm_set_ss (*__P);
+}
+
+/* Create a vector with all four elements equal to *P. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load1_ps (float const *__P)
+{
+ return _mm_set1_ps (*__P);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_ps1 (float const *__P)
+{
+ return _mm_load1_ps (__P);
+}
+
+/* Load four SPFP values from P. The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_load_ps (float const *__P)
+{
+ return (__m128) *(__v4sf *)__P;
+}
+
+/* Load four SPFP values from P. The address need not be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadu_ps (float const *__P)
+{
+ return (__m128) __builtin_ia32_loadups (__P);
+}
+
+/* Load four SPFP values in reverse order. The address must be aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_loadr_ps (float const *__P)
+{
+ __v4sf __tmp = *(__v4sf *)__P;
+ return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
+}
+
+/* Create the vector [Z Y X W]. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z]. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Stores the lower SPFP value. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_ss (float *__P, __m128 __A)
+{
+ *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE float __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_cvtss_f32 (__m128 __A)
+{
+ return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* Store four SPFP values. The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_ps (float *__P, __m128 __A)
+{
+ *(__v4sf *)__P = (__v4sf)__A;
+}
+
+/* Store four SPFP values. The address need not be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_storeups (__P, (__v4sf)__A);
+}
+
+/* Store the lower SPFP value across four words. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store1_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+ _mm_storeu_ps (__P, __tmp);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+ _mm_store1_ps (__P, __A);
+}
+
+/* Store four SPFP values in reverse order. The address must be aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_storer_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+ _mm_store_ps (__P, __tmp);
+}
+
+/* Sets the low SPFP value of A from the low value of B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m128 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Extracts one of the four words of A. The selector N must be immediate. */
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+ return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pextrw (__m64 const __A, int const __N)
+{
+ return _mm_extract_pi16 (__A, __N);
+}
+#else
+#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
+#define _m_pextrw(A, N) _mm_extract_pi16((A), (N))
+#endif
+
+/* Inserts word D into one of four words of A. The selector N must be
+ immediate. */
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+ return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+ return _mm_insert_pi16 (__A, __D, __N);
+}
+#else
+#define _mm_insert_pi16(A, D, N) \
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
+#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N))
+#endif
+
+/* Compute the element-wise maximum of signed 16-bit values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+ return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+ return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pminsw (__m64 __A, __m64 __B)
+{
+ return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pminub (__m64 __A, __m64 __B)
+{
+ return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_movemask_pi8 (__m64 __A)
+{
+ return __builtin_ia32_pmovmskb ((__v8qi)__A);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE int __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmovmskb (__m64 __A)
+{
+ return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+ in B and produce the high 16 bits of the 32-bit results. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+ return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A. The selector
+ must be an immediate. */
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_shuffle_pi16 (__m64 __A, int __N)
+{
+ return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pshufw (__m64 __A, int __N)
+{
+ return _mm_shuffle_pi16 (__A, __N);
+}
+#else
+#define _mm_shuffle_pi16(A, N) \
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
+#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N))
+#endif
+
+/* Conditionally store byte elements of A into P. The high bit of each
+ byte in the selector N determines whether the corresponding byte from
+ A is stored. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+ __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+ _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pavgb (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_pavgw (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+ values in A and B. Return the value in the lower 16-bit word; the
+ upper words are cleared. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE __m64 __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_m_psadbw (__m64 __A, __m64 __B)
+{
+ return _mm_sad_pu8 (__A, __B);
+}
+
+/* Loads one cache line from address P to a location "closer" to the
+ processor. The selector I specifies the type of prefetch operation. */
+#if 0
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_prefetch (void *__P, enum _mm_hint __I)
+{
+ __builtin_prefetch (__P, 0, __I);
+}
+#else
+#define _mm_prefetch(P, I) \
+ __builtin_prefetch ((P), 0, (I))
+#endif
+
+/* Stores the data in A to the address P without polluting the caches. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ __builtin_ia32_movntq (__P, __A);
+}
+
+/* Likewise. The address must be 16-byte aligned. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_stream_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_movntps (__P, (__v4sf)__A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+ any subsequent store. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_sfence (void)
+{
+ __builtin_ia32_sfence ();
+}
+
+/* The execution of the next instruction is delayed by an implementation
+ specific amount of time. The instruction does not modify the
+ architectural state. */
+/* APPLE LOCAL begin radar 5618945 */
+__STATIC_INLINE void __attribute__((__always_inline__))
+/* APPLE LOCAL end radar 5618945 */
+_mm_pause (void)
+{
+ __asm__ __volatile__ ("rep; nop" : : );
+}
+/* APPLE LOCAL end radar 4152603 */
+
+/* Transpose the 4x4 matrix composed of row[0-3]. */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
+do { \
+ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
+ __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \
+ __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \
+ __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \
+ __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \
+ (row0) = __builtin_ia32_movlhps (__t0, __t1); \
+ (row1) = __builtin_ia32_movhlps (__t1, __t0); \
+ (row2) = __builtin_ia32_movlhps (__t2, __t3); \
+ (row3) = __builtin_ia32_movhlps (__t3, __t2); \
+} while (0)
+
+/* APPLE LOCAL begin nodebug inline 4152603 */
+#undef __always_inline__
+/* APPLE LOCAL end nodebug inline 4152603 */
+
+/* For backward source compatibility. */
+#include <emmintrin.h>
+
+#endif /* __SSE__ */
+#endif /* _XMMINTRIN_H_INCLUDED */